1
0
mirror of https://0xacab.org/jvoisin/mat2 synced 2025-10-06 08:32:52 +02:00

466 Commits

Author SHA1 Message Date
jvoisin
235403bc11 Edit README.md 2025-09-04 15:10:12 +02:00
jvoisin
102f08cd28 Switch the project from 0xacab to github
While the folks running 0xacab are much more lovely than the github ones, this
project has outgrown the former:

- Github offers beefy continuous integration, make it easier to run the
  testsuite on every python version, instead of using a weird docker-based
  contraption. Moreover, I'd rather burn some Microsoft money than 0xacab one.
- Opening an account on 0xacab is non-trivial (by design), making it tedious
  for people to report issues and contribute to mat2.
- Gitlab is becoming unbearably slow and convoluted, even compared to Github's
  awful Copilot/AI push.

It's a sad state of affairs, but it's a pragmatic decision. People who don't
have a Github account can still report issues and send patches by sending me an
email.
2025-09-04 14:35:36 +02:00
jvoisin
7a8ea224bc Fix issue introduced in f073444
The continuous integration on 0xacab didn't run, so it didn't catch this issue.
It seems like we'll have to move to github or whatever instead, sigh.
2025-09-01 23:52:43 +02:00
jvoisin
504efb2448 Remove mypy from the CI
It has always been useless a best, and a nuisance most of the times.
2025-09-01 14:35:25 +02:00
jvoisin
f07344444d Fix a broken test
Reported-By: https://github.com/NixOS/nixpkgs/issues/436421
2025-08-25 12:07:15 +02:00
jvoisin
473903b70e Fix HEIC parsing with the latest exiftool 2025-04-03 17:34:44 +02:00
jvoisin
1438cf7bd4 Disable webp tests for now
```
======================================================================
ERROR: test_all_parametred (tests.test_libmat2.TestCleaning.test_all_parametred) (case={'name': 'webp', 'parser': <class 'libmat2.images.WEBPParser'>, 'meta': {'Warning': '[minor] Improper EXIF header'}, 'expected_meta': {}})
----------------------------------------------------------------------
Traceback (most recent call last):
  File "/builds/jvoisin/mat2/libmat2/images.py", line 109, in __init__
    GdkPixbuf.Pixbuf.new_from_file(self.filename)
    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^
gi.repository.GLib.GError: gdk-pixbuf-error-quark: Couldn’t recognize the image file format for file “./tests/data/clean.webp” (3)
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
  File "/builds/jvoisin/mat2/tests/test_libmat2.py", line 557, in test_all_parametred
    p1 = case['parser'](target)
  File "/builds/jvoisin/mat2/libmat2/images.py", line 111, in __init__
    raise ValueError
ValueError
```

Pending on https://0xacab.org/georg/mat2-ci-images/-/issues/14
2025-04-03 17:34:40 +02:00
jvoisin
e740a9559f Properly handle an exception
```
Traceback (most recent call last):
  File "/builds/jvoisin/mat2/tests/test_deep_cleaning.py", line 147, in test_office
    meta = p.get_meta()
  File "/builds/jvoisin/mat2/libmat2/archive.py", line 155, in get_meta
    zin.extract(member=item, path=temp_folder)
    ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.13/zipfile/__init__.py", line 1762, in extract
    return self._extract_member(member, path, pwd)
           ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.13/zipfile/__init__.py", line 1829, in _extract_member
    os.makedirs(upperdirs, exist_ok=True)
    ~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "<frozen os>", line 227, in makedirs
OSError: [Errno 28] No space left on device: '/tmp/tmptl1ibyv6/word/theme'
```

This should never happen™, but just in case…
2025-04-03 15:24:34 +02:00
Vincent Deffontaines
2b58eece50 Add webp support 2025-03-18 22:20:17 +01:00
georg
29f404bce3 CI: run tests via python3.{13,14} 2025-01-09 09:52:47 +00:00
jvoisin
6c966f2afa Significantly improve portability 2025-01-09 02:36:16 +01:00
jvoisin
70d236a062 Bump the changelog 2025-01-09 00:43:12 +01:00
Alex Marchant
d61fb7f77a Wait to remove elements until they are all processed 2024-09-13 14:28:57 +02:00
jvoisin
1aed4ff2a5 Catch a MemoryError in cairo
This should close #202
2024-09-13 14:28:50 +02:00
matiargs
75c0a750c1 Keep orientation metadata 2024-07-18 15:04:24 +00:00
jvoisin
a47ac01eb6 Remove a duplicate function
This is a leftover from today's best-effort merges.
2024-04-05 19:51:14 +02:00
Alex Marchant
156855ab7e Remove dangling references from document.xml.rels
The file `word/_rels/document.xml.rels` is similar to `[Content_Types].xml` and
has references to other files in the archive. If those references aren't
removed Word refuses to open the document. # Please enter the commit message
for your changes. Lines starting
2024-04-05 18:45:58 +02:00
jvoisin
09672a2dcc Merge branch 'alexmarchant-utf-8-encode-all' 2024-04-05 18:33:30 +02:00
Alex Marchant
f2c898c92d Strip comment references from document.xml 2024-04-05 18:31:49 +02:00
Alex Marchant
f931a0ecee Make utf-8 explicit in all tree.write calls 2024-04-03 15:27:48 -04:00
Alex Marchant
61f39c4bd0 Strip comment references from document.xml 2024-04-03 15:20:00 -04:00
Alex Marchant
1b9ce34e2c Add test that checks if comments.xml is removed without errors 2024-04-03 15:03:33 -04:00
Alex Marchant
17e76ab6f0 Update comments file regex 2024-04-03 14:49:39 -04:00
jvoisin
94ef57c994 Add python3.12 in the CI 2024-01-02 02:50:44 +00:00
jvoisin
05d1ca5841 Improve the pyproject.yaml file
Prompted by !113
2023-12-31 18:34:39 +01:00
jvoisin
55b468ded7 Update Arch Linux package URL in INSTALL.md
Patch by https://github.com/felixonmars
2023-11-21 12:27:45 +01:00
jvoisin
0fcafa2edd Raise a ValueError for invalid FLAC files to please mypy 2023-11-13 15:03:42 +01:00
Romain Vigier
7405955ab5 parsers: Inherit the sandbox option when creating additional parsers 2023-11-13 13:11:35 +01:00
Romain Vigier
e6564509e1 mat2: Fix the --no-sandbox argument
The --no-sandbox argument was parsed incorrectly, meaning no sandbox was
used when it was absent and the sandbox being used when it was present.
2023-11-13 13:06:38 +01:00
jvoisin
bbd5b2817c Fix the CI on Debian 2023-11-08 15:44:33 +01:00
jvoisin
73f2a87aa0 Provide a name for the loggers 2023-09-08 22:16:45 +02:00
jvoisin
abcdf07ef4 Properly handle a cairo exception 2023-09-07 16:31:34 +02:00
Rui Chen
a3081bce47 setup: use share/man/man1 for man1 2023-08-31 19:44:28 -04:00
georg
47d5529840 tests: drop duplicate dirty.epub file; it's stored below data/ as well 2023-08-03 13:42:15 +00:00
jvoisin
fa44794dfd Fix the project name in pyproject.toml 2023-08-02 21:21:44 +02:00
jvoisin
04786d75da Bump the changelog 2023-08-02 21:09:12 +02:00
jvoisin
cb7b5747a8 Add the manpage to the PyPI package
This should close #192
2023-07-11 22:03:56 +02:00
Jason Smalls
8c26020f67 Add more files to ignore for MSOffice documents 2023-07-11 21:38:22 +02:00
Jason Smalls
a0c97b25c4 Add a variant mimetype for bmp 2023-07-11 21:35:04 +02:00
Jason Smalls
1bcb945360 Harden get_meta in archive.py against variants of CVE-2022-35410 2023-07-11 21:31:53 +02:00
jvoisin
9159fe8705 Mention wp-mat in the readme 2023-06-05 19:52:13 +02:00
jvoisin
1b9608aecf Use proper type annotations instead of comments 2023-05-03 22:28:02 +02:00
jvoisin
2ac8c24dac Make use of is_dir/isdir for archives 2023-05-03 22:19:19 +02:00
jvoisin
71ecac85b0 Add some documentation about OSX 2023-04-11 21:35:25 +02:00
georg
b9677d8655 CI: codespell: drop obsolete list of ignored words
codespell was dropped via a63011b3f6.
Accordingly, this commit does some cleanup.
2023-03-21 13:18:54 +00:00
georg
6fde80d3e3 CI: shallow clone repository and limit depth to 5
The previous commit changed the strategy to 'clone', instead of 'fetch'
as before. While this fixes permission errors, it is also slower, as an
existing checkout of the repository will be ignored. To overcome this,
this commit limits the depth to 5.
2023-03-20 15:11:02 +00:00
georg
6c05360afa CI: 'clone' git repository instead of 'fetch'
While the former is slower, the later might lead to errors such as
"fatal: detected dubious ownership in repository at" which is fixed
GitLab upstream via
https://gitlab.com/gitlab-org/gitlab-runner/-/merge_requests/3538, but
not yet released.

Closes #191
2023-03-20 15:10:56 +00:00
georg
596696dfbc CI: Add python3.{7,8,9,10,11} test jobs
Closes #187
2023-03-15 23:38:39 +00:00
jvoisin
daa17a3e9c Fix the CI on Archlinux 2023-03-12 13:29:46 +01:00
Gu1nn3zz
6061f47231 fix: Typing in the parser factory 2023-03-07 17:37:56 +00:00
georg
8b41764a3e CI: linting: ruff: specify image
Otherwise, this job might fail, depending on the runner which executes
the job, due to different configurations, especially wrt the default
image.

Ref https://0xacab.org/jvoisin/mat2/-/merge_requests/105
2023-03-07 11:25:17 +00:00
Rui Chen
ed0ffa5693 Update pyproject.toml to include version 2023-02-24 09:12:06 +00:00
jvoisin
b1c03bce72 Bump the changelog 2023-02-23 21:36:46 +01:00
jvoisin
a63011b3f6 Improve the CI
- Remove some useless linters
- Make use of ruff
2023-02-20 21:15:07 +01:00
jvoisin
e41390eb64 Explicitly pass a parameter to functools.lru_cache 2023-01-31 20:42:39 +01:00
jvoisin
66a36f6b15 Bump the changelog 2023-01-28 17:55:02 +01:00
jvoisin
3cb3f58084 Another typing pass 2023-01-28 17:22:26 +01:00
jvoisin
39fb254e01 Fix the type annotations 2023-01-28 15:57:20 +00:00
jvoisin
1f73a16ef3 imghdr is deprecated 2023-01-14 15:38:12 +01:00
jvoisin
e8b38f1101 Revert "Simplify a bit the typing annotations of ./mat2"
This reverts commit 29057d6cdf.
2023-01-14 15:35:21 +01:00
jvoisin
8d7230ba16 Fix -l output 2023-01-07 17:10:02 +01:00
jvoisin
2b02c82e7f Bump the changelog 2023-01-07 16:52:58 +01:00
Megamind
b00e221675 Make the 'A' in the README ascii art look more "A-like" 2022-12-30 20:05:33 +00:00
jvoisin
62a45c29df Improve xlsx support 2022-12-25 18:05:13 +01:00
jvoisin
6479d869e4 Remove the Nautilus extension 2022-12-05 20:31:12 +01:00
jvoisin
29057d6cdf Simplify a bit the typing annotations of ./mat2 2022-11-21 19:58:53 +01:00
jvoisin
180ea24e5a Remove pyflakes
Isn't borderline useless compared to mypy and pylint
2022-11-21 19:57:38 +01:00
jvoisin
618e0a8e39 Fix the tests on the latest Debian 2022-10-09 21:49:07 +02:00
jvoisin
6d93cf9397 Remove deprecated pylint checks 2022-10-09 21:14:20 +02:00
jvoisin
b1a16b334f Get rid of a deprecated check
Nobody should be using Poppler < 0.46
2022-10-09 21:11:02 +02:00
jvoisin
0501359600 Please pylint 2022-10-09 21:04:19 +02:00
jvoisin
cc5be8608b Simplify the typing annotations 2022-08-28 22:29:06 +02:00
b068931cc450442b 63f5b3d276ea4297
292f44c086 update source and installation 2022-08-24 20:20:02 +02:00
jvoisin
2dd196c2c7 Make use of cache to get binary paths 2022-08-05 20:43:37 +02:00
jvoisin
34eb878aae Add the CVE number to the changelog 2022-07-08 22:09:22 +02:00
jvoisin
eec5c33a6b Bump the changelog 2022-07-06 19:20:21 +02:00
jvoisin
beebca4bf1 Prevent arbitrary file read via zip archives
A zip file with a file pointing to /etc/passwd would, upon being cleaned by
mat2, produce a file with the filesystem's /etc/passwd file.
2022-07-05 16:27:07 +02:00
jvoisin
e2c4dbf721 Show a scary message in case of path traversal attempt 2022-07-05 15:30:10 +02:00
jvoisin
704367f91e Add support for HEIC files
Thanks to Maxime Morin ( https://www.maijin.fr/ )
for the patch.
2022-05-15 18:57:27 +02:00
jvoisin
2639713709 Minor cleanup 2022-05-05 22:00:09 +02:00
jvoisin
b18e6e11f0 Bump the changelog 2022-04-30 17:31:29 +02:00
jvoisin
62dc8c71c1 Enable gitlab's SAST 2022-04-02 16:19:13 +02:00
jvoisin
697e9583b9 Please the linters 2022-03-29 22:18:06 +02:00
jvoisin
1b37604d3a Make processing multiple files safer concurrence-wise 2022-03-29 22:15:04 +02:00
jvoisin
1c3e2afa1e Escape more control chars in the cli 2022-03-29 22:13:55 +02:00
jvoisin
05b8e97b68 Simplification of the testsuite 2022-03-29 22:13:33 +02:00
jvoisin
2a74a400e2 Fix the svg tests on archlinux 2022-03-28 23:22:42 +02:00
jvoisin
5ccddae7f5 Fix the PDF version
This should prevent the testsuite from breaking,
and marginally increase fingerprinting resistance.
2022-03-28 22:34:57 +02:00
jvoisin
12582ba2f5 Try to use modern rsvg functions when we can 2022-03-16 20:23:49 +01:00
jvoisin
35092562e6 Mention dangerzone 2022-01-06 18:31:34 +01:00
jvoisin
e5dcd39225 Bump the changelog 2022-01-06 17:00:22 +01:00
jvoisin
660f0dea73 Fix the dolphin integration
Kudos to Miguel Angel Marco Buzunariz for the original patch.
2022-01-05 13:54:50 +01:00
jvoisin
cd2b9af902 Fix the Debian CI
This should fix #162
2021-12-26 16:11:26 +01:00
jvoisin
3378f3ab8c Please pylint by iterating on dict directly, instead of calling .keys() 2021-12-26 15:23:26 +01:00
jvoisin
48680b9852 Add a fuzzer based on atheris 2021-12-19 22:37:45 +01:00
jvoisin
d555a02c90 Increase audio processing robustness 2021-12-19 22:33:28 +01:00
jvoisin
143bb0a5f3 Add a check for weird audio files, instead of crashing 2021-12-18 19:43:21 +01:00
jvoisin
a1a7c76dc9 Make mat2 more robust wrt. weird audio files 2021-12-14 23:30:13 +01:00
jvoisin
01b39aa68c Make libmat2 more robust against corrupted zip files 2021-12-13 19:44:44 +01:00
jvoisin
e312868c4e Increase a bit the robustness of mat2
Those issues were found via https://github.com/google/atheris
2021-12-13 19:00:41 +01:00
Denis 'GNUtoo' Carikli
b71bafd2cf CONTRIBUTING.md: Update information about tests
Contributors are now supposed to run tests in the GitLab instance, so
this also needs to be reflected in the CONTRIBUTING.md as otherwise
people not used to forge workflows could completely miss that
information.

Signed-off-by: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
2021-12-09 17:16:34 +01:00
jvoisin
22199df4d0 Please the linters wrt. the previous commit 2021-12-09 16:58:24 +01:00
Denis 'GNUtoo' Carikli
1703ed6ebb zip archives: keep individual files compression type
While hardcoding the compression to zipfile.ZIP_DEFLATED works for
most use cases of mat, being able to produce cleaned up uncompressed
zip files is useful for content that cannot be compressed more.

In addition it also enables to use mat2 for reproducible builds of
Android bootanimation files file that don't support compression.

Signed-off-by: Denis 'GNUtoo' Carikli <GNUtoo@cyberdimension.org>
2021-12-08 18:36:39 +01:00
jvoisin
541b3c83b2 We're not in beta anymore. 2021-11-23 17:39:19 +01:00
jvoisin
6afb0cb9d8 Please the linter, again. 2021-11-23 17:36:17 +01:00
jvoisin
1c4e98425a Make the linter happier 2021-11-23 17:34:14 +01:00
jvoisin
fb7440ab5d Please a bit the CI 2021-11-21 11:02:22 +01:00
jvoisin
0c91ac7367 Implement code for internationalization 2021-11-12 20:10:57 +01:00
jvoisin
708841f9f5 Fix the coverage 2021-10-03 19:57:44 +02:00
jvoisin
d4479d9baa Bump the changelog 2021-08-29 13:33:47 +02:00
georg
08a5792a9a libmat2/pdf: Drop printing 'lol'
It seems this was committed without intention to do so.
2021-07-29 15:40:37 +00:00
Antonio Eugenio Burriel
3b094ae449 Fix pdf issues on printers
pyCairo by default renders the PDF surfaces with a resolution of 72
dpi which is so low that the bitmap gets blurred compared to original.

Since pyCairo 1.12.0, a new method set_device_scale(x_scale, y_scale)
is added, which allows changing the canvas resolution.
2021-07-25 14:12:57 +02:00
jvoisin
0b094b594b Improve xlsx support
This should close #156
2021-07-14 23:34:02 +02:00
jvoisin
8c1107c358 Make cairo behave in a less idiotic way
Because raising errors when unable to process
stuff instead of an exception is dumb.
2021-06-21 22:39:45 +02:00
jvoisin
6df615281b Fix the CI for recent exiftool versions
Always a joy to deal with withespaces
2021-06-06 16:33:35 +02:00
georg
49c8b14e59 KDE Dolphin: add German translation
Credits to @RandomGuy234
Closes #154
2021-06-01 18:47:28 +00:00
jvoisin
bf0c777cb9 Improve support for xlsx files 2021-05-20 18:16:28 +02:00
jvoisin
682552d152 Allow bubblewrap to fail for now 2021-05-05 21:04:29 +02:00
jvoisin
c9be50f968 Fix the CI in Fedora 2021-05-05 21:03:20 +02:00
jvoisin
2eec653e99 Please pylint 2021-04-24 17:28:44 +02:00
jvoisin
85c08c5b68 Add support for AIFF files
This should close #151
2021-04-24 17:26:38 +02:00
jvoisin
c5841a241d Bump the changelog 2021-03-19 17:54:21 +01:00
jvoisin
d00ca800b2 Keep sharedStrings.xml when processing MSOffice sheets 2021-03-14 14:41:40 +01:00
jvoisin
8b42b28b70 Don't keep [trash] files when processing MS Office files 2021-03-14 14:35:29 +01:00
jvoisin
e2362b8620 Improve epub support
Warn when there are encrypted fonts in an epub file
2021-03-07 17:50:25 +01:00
jvoisin
626669f95f Add some typing to epub.py 2021-03-07 17:50:17 +01:00
jvoisin
497f5f71fc Improve epub compatibility 2021-03-07 16:59:18 +01:00
jvoisin
cd5f2eb71c Add a missing comma
This should improve epub support
2021-03-07 16:42:38 +01:00
jvoisin
ec082d6483 Improve a bit the support of epub 2021-02-07 17:24:50 +01:00
jvoisin
f8111547ae Improve epub compatibility 2021-01-30 16:24:42 +01:00
jvoisin
88fa71fbde Bump the changelog 2020-12-18 17:55:41 +01:00
Romain Vigier
6cd28ed46c Add Metadata Cleaner link 2020-12-07 11:14:03 +01:00
jvoisin
92dcc8175d Add a mention of mat2-web 2020-11-30 22:02:07 +01:00
jvoisin
7131aa6fd7 Fix the link to the mailing list 2020-11-30 21:52:39 +01:00
jvoisin
7ce2b5121b Please pylint 2020-11-30 18:53:25 +01:00
jvoisin
a517f8d36e Please pylint 2020-11-30 18:52:07 +01:00
jvoisin
61dce89fbd Raise a ValueError explicitly 2020-11-30 18:52:07 +01:00
jvoisin
88b7ec2c48 Don't be silly 2020-11-23 19:55:35 +01:00
Holger Paradies
8bea98911e Fix dolphin integration 2020-11-21 15:31:13 +00:00
jvoisin
62ec8f6c1e Fix the CI on fedora 2020-11-13 17:30:47 +01:00
jvoisin
148bcbba52 Bump coverage 2020-11-13 17:27:23 +01:00
jvoisin
b3def8b5de Mount /etc/alternatives inside bubblewrap
This is now required by ffmpeg
2020-11-13 17:18:20 +01:00
jvoisin
77dde8a049 Please pylint 2020-11-13 12:09:25 +01:00
Romain Vigier
1b361ec27e Don't set a default value when retrieving Xmlns key for SVG metadata 2020-11-12 22:46:14 +01:00
jvoisin
58a1563a99 Better test of corrupted MSOffice files 2020-11-06 16:05:42 +01:00
jvoisin
f638168033 Better handling of malformed pdf 2020-11-06 16:05:24 +01:00
jvoisin
b84f73c5c3 Handle multiple namespaces in MSOffice's content types 2020-11-06 15:29:42 +01:00
jvoisin
96e639dfd3 Fix a regexp for xsls files
This should increase a bit the compability with Excel files
2020-11-06 15:26:30 +01:00
jvoisin
46b3ae1672 Fix a crash affecting some mp3 files 2020-07-22 15:47:35 +02:00
jvoisin
d0bc79442b Add a small bla about donations 2020-06-30 22:03:14 +02:00
rhamnousia
17919c73a9 typo fixes in nautilus/mat2.py 2020-06-29 12:54:24 +00:00
rhamnousia
60d820b053 fixed a minor typo in the nautilus readme 2020-06-28 18:32:00 -04:00
jvoisin
461534a966 Add a list of supported formats in the README 2020-06-09 13:50:51 +02:00
jvoisin
d8b68ef68e Improve a bit Microsoft word support 2020-05-17 16:53:36 +02:00
jvoisin
c8dc020dc5 Improve xlsx support 2020-04-06 20:47:32 +02:00
jvoisin
599909a760 Improve xlsx support 2020-04-02 20:58:10 +02:00
jvoisin
d008b1e2f0 Bump the changelog 2020-03-29 13:21:55 +02:00
jvoisin
d7a03d907b Vastly improve ppt compatibility 2020-03-08 14:06:27 +01:00
jvoisin
a23dc001cd Improve compatibility with MS Office of cleaned ppt 2020-03-07 14:34:07 +01:00
jvoisin
f93df85d03 Improve a bit ppt support 2020-03-07 05:22:36 -08:00
jvoisin
e5b1068ed6 Improve a bit the support of ppt files 2020-03-07 12:49:45 +01:00
Antoine Tenart
843c0d8cc5 mat2: standardize the help messages format
This is a cosmetic patch only.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2020-02-27 10:25:29 +01:00
tguinot
56d2c4aa5f Add which pathfinding for executables 2020-02-11 17:23:11 +01:00
jvoisin
12f23e0150 Bump the changelog 2020-02-09 19:00:34 +01:00
jvoisin
72f41c5e05 Clarify a bit the manpage 2020-02-08 17:04:53 +01:00
jvoisin
5270071b94 Remove a couple of residual metadata in pdf
This commit takes care of removing residual metadata
added by mat2 during the cleaning of pdf.
2020-02-08 17:00:37 +01:00
jvoisin
5312603a88 Fix the testsuite 2020-02-08 15:21:13 +01:00
jvoisin
ebe06cb8a9 Add an example of possible quality loss in the manpage 2020-02-08 14:01:13 +01:00
jvoisin
6dd48de4ef Improve a bit the robustness of the testsuite 2020-02-05 21:53:57 +01:00
georg
e0f4f0e302 man: fix typo and improve wording 2020-01-06 11:22:10 +00:00
jvoisin
4acf3af002 Add a note about lightweight mode in the man 2020-01-05 20:32:38 +01:00
jvoisin
ee704db2ff Add support for wav files 2020-01-01 19:47:46 +01:00
jvoisin
693408f1a6 Please mypy
Mypy doesn't like some annotation in web.py,
this commits aims at pleasing it.
2019-12-29 15:20:48 +01:00
jvoisin
0902e9e330 Make the testsuite a bit more robust
Some terminals with a small number of column
could wrap the cli's output in a way that
would make the testsuite fail.
This commit break the tests in several smaller one
to mitigate this.

This issue was originally reported by eleius
[here](https://github.com/actionless/pikaur/issues/433), and forwarded as #153.
2019-12-23 23:10:39 +01:00
Ivy Fay
b2efffdaa4 sandbox: stop mounting new filesystem on /tmp
Mounting new, empty filesystem on /tmp makes impossible to use mat2 for manipulating files stored there. Especially it breaks running tests while creating package and using /tmp as temporary builddir which is common setup in Arch Linux:
https://aur.archlinux.org/packages/mat2/#comment-721221
2019-12-18 02:23:43 -08:00
jvoisin
7465cedee7 Handle tiff images with a .tif extension 2019-12-16 14:55:35 -08:00
jvoisin
f5aef1b391 Improve the reliability of Exiftool-base parsers 2019-12-15 09:04:51 -08:00
jvoisin
2e3496d3d4 Improve the reliability of Gdk-based parsers 2019-12-15 07:05:53 -08:00
jvoisin
be24c681ff Improve the reliability of PNG parsing 2019-12-15 06:57:32 -08:00
jvoisin
efa525c102 Improve the robustness of the HTML parser 2019-12-15 06:50:54 -08:00
jvoisin
f67cd9d7dc Improve the robustness of the CSS parser 2019-12-15 06:44:21 -08:00
jvoisin
615997be38 Update the help section of the readme 2019-12-08 11:28:32 +01:00
jvoisin
4ba4b143e6 Add a note about metadata 2019-12-02 17:10:34 +01:00
Ivy Fay
8c7b23be90 .gitlab-ci.yml: make test command consistent across distros
This switches to use "python3 -m unittest discover -v" onevery distro.
2019-12-02 14:45:32 +00:00
georg
db797e3a52 Mention KDE Dolphin service menu (and fix typo) 2019-12-01 12:28:55 +00:00
jvoisin
da182dc2f8 Bump the changelog 2019-11-30 18:31:07 +01:00
jvoisin
e4114af3b5 Improve a bit ppt support 2019-11-30 11:38:22 +01:00
jvoisin
d56f83bed1 Improve a bit odt handling 2019-11-30 10:25:24 +01:00
georg
697cb36b81 This is mat2, not MAT2
Closes #131
2019-11-30 01:14:41 -08:00
jvoisin
6e52661cfb Fix the testsuite on Python3.8
There is a bug in Python3.8 (https://bugs.python.org/issue38688)
triggering an infinite recursion when copying a tree
in a subfolder of the current one. We're working around it
by using a list instead of an iterator, so that Python
won't "discover" the target folder as part of the source files.

This should fix #130
2019-11-30 10:10:41 +01:00
mathilde
03f5129968 fix copyright attribution formatting 2019-11-27 23:10:38 +01:00
georg
deeee256cc CI: Use pylint, instead of pylint3
It seems, despite the name, both packages depend on Python 3. However,
pylint3 seems deprecated, and upstream recommends to install pylint:
https://www.pylint.org/#install

The current versions of both packages in Debian unstable are:
pylint  2.4.4-1
pylint3 2.2.2-1

This commit fixes failing CI jobs due to the use of pylint3 2.2.2-1,
which seems broken.
2019-11-26 23:16:49 +00:00
jvoisin
df1eb98a40 Please the new version of pylint 2019-11-26 22:12:56 +01:00
jvoisin
ada53cb9c6 Add an integration with Dolphin 2019-11-25 21:56:24 +01:00
jvoisin
655c19d17d Improve a bit the support for ppt files 2019-10-17 23:02:17 +02:00
jvoisin
a389cc760a Fix a stacktrace in ./mat2 when the file can't be cleaned 2019-10-17 22:51:00 +02:00
jvoisin
4034cf9a1a Copy file permissions
Mat2 (the cli) will now copy the input file permissions
to the output file.
2019-10-13 11:54:47 +02:00
jvoisin
5f0b3beb46 Add a way to disable the sandbox
Due to bubblewrap's pickiness, mat2 can now be run
without a sandbox, even if bubblewrap is installed.
2019-10-12 16:13:49 -07:00
jvoisin
3cef7fe7fc Refactor tests 2019-10-12 13:32:04 -07:00
jvoisin
6d19a20935 Remove an unused variable 2019-10-12 21:41:13 +02:00
jvoisin
12489bb682 Remove a useless \ 2019-10-12 21:36:28 +02:00
jvoisin
bb903ec309 Remove useless parenthesis 2019-10-12 21:36:19 +02:00
jvoisin
893faa6604 Fix a test for png's lightweight cleaning on corrupted files 2019-10-12 21:34:31 +02:00
jvoisin
4483c06f19 Replace abstractstaticmethod with abstractmethod
Apparently, abstractstaticmethod is deprecated
since python3.3.
2019-10-12 21:28:27 +02:00
madaidan
58773088ac Mount a new tmpfs on /tmp and drop all capabilities
This mounts a new tmpfs on /tmp so any files residing there would be hidden
from the sandbox. Many programs store some files in there that might be useful
to an attacker.  It also drops all capabilities incase it is ever run with
extra capabilities for whatever reason.
2019-10-05 15:21:40 +02:00
jvoisin
3714553185 Fix bubblewrap
On some machines (like mine), `/proc` has to be mounted.  Also, since
sandboxing with bubblewrap is best effort and assumes that an attacker doesn't
have control outside of the file to clean, it's safe to __try__ to enable some
bubblewrap features, and to silently fail otherwise.
2019-09-21 14:14:39 +02:00
jvoisin
1678d37856 Mark a comment as FP 2019-09-01 19:01:33 +02:00
jvoisin
397a18b0cc Add support for ppm 2019-09-01 09:28:46 -07:00
jvoisin
fc924239fe Add a test for nsid cleaning 2019-09-01 13:52:02 +02:00
jvoisin
0170f0e37e Improve a bit the comments in the code
This is related to the previous commit
2019-09-01 13:52:02 +02:00
jvoisin
0cf0541ad9 Remove nsid fields from MSOffice documents
nsids are random identifiers, usually used to ease merging
between documents, and can trivially be used for fingerprinting.
2019-09-01 13:52:02 +02:00
jvoisin
40669186c9 Add support for inplace cleaning 2019-08-31 10:31:08 -07:00
jvoisin
d76a6cbb18 Some arguments of mat2 are mutually exclusive 2019-08-01 08:14:21 -07:00
jvoisin
49e0c43ac5 Tweak a bit the ci
- gentoo and debian with bubblewrap are not allowed to fail anymore
- don't run coverage on debian without bubblewrap
2019-07-22 23:36:20 +02:00
jvoisin
0c75cd15dc Remove a mypy workaround to bump coverage back to 100% 2019-07-22 23:28:51 +02:00
jvoisin
5280b6c2b3 Add a test for svg namespace 2019-07-22 23:21:06 +02:00
georg
a81ea65d44 CI: Run bubblewrap tests as different user than 'root' to fix errors
It seems, there is a bug somewhere if the test suite is invoked as
'root', and bubblewrap is available.
2019-07-22 13:39:06 -07:00
georg
8bb2826f7a CI: Add job to run codespell, a spell checking software 2019-07-22 13:31:40 -07:00
jvoisin
5c33b290ae Fix mypy 2019-07-20 16:05:55 +02:00
jvoisin
00d728f6cc Display the filename along with the "No metadata found" message 2019-07-18 01:30:28 +02:00
georg
65cfd110f9 Nautilus: Add note that distribution packages ship the extension
Relates #106
2019-07-14 23:07:36 +00:00
georg
1f830bf8ad README: Drop note about Debian jessie, which is oldoldstable nowadays
As such, hopefully, it's not really used widely anymore. If so, this
note isn't really relevant.
2019-07-14 14:19:45 -07:00
georg
d027008e46 README: Add note about the user interfaces provided 2019-07-14 14:01:54 -07:00
georg
1163bdd991 README: Drop note about web disclosure to broaden the possible use cases 2019-07-14 19:22:33 +00:00
georg
1be0a4eefb INSTALL: Update Debian package status
Also, make the note generic, to omit the need to update it "constantly".

Closes #76
2019-07-13 14:29:55 -07:00
jvoisin
dc5603eb1d Please mypy 2019-07-13 23:25:44 +02:00
jvoisin
4999209f9c Add support for svg 2019-07-13 21:26:05 +02:00
jvoisin
bdd5581033 Compress cleaned zip archives by default 2019-07-13 15:04:43 +02:00
jvoisin
47f9cb33bf Please mypy 2019-07-13 15:03:40 +02:00
georg
b784a9fc7f doc/threat_model: this is about mat2, not mat 2019-07-10 14:36:47 +00:00
jvoisin
88b95923ab Parallelize the cli 2019-06-05 22:28:57 +02:00
jvoisin
13d71a2565 Document the archives handling implementation's details 2019-05-16 20:59:15 +02:00
jvoisin
35d550d229 Use memoization get _*_path() functions
This shouldn't make a big difference in the CLI/extension
usage, but might improve the performances of long-running
instances, or people misusing the API.
2019-05-16 00:31:40 +02:00
jvoisin
aa52a5c91c Please mypy wrt. the last two commits 2019-05-14 00:50:17 +02:00
Antoine Tenart
f19f6ed8b6 Rework the dependency checks to distinguish required/optional ones
Rework the dependencies definition to include a 'required' flags, which
is passed by the check_dependencies helper to the callers, so that they
can distinguish between required and optional dependencies.

This help in two ways:
- The unit test for the dependencies was now failing when an optional
  one was missing, due to a previous rework.
- Mat2's --check-dependencies was referring to "required dependencies"
  and was misleading for the user as some of them could be optional.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-05-13 23:35:26 +02:00
Antoine Tenart
51ab2db279 tests: libmat2: RuntimeError cannot be thrown by chech_dependencies
Remove the try/except logic when calling check_dependencies, as it
cannot throw the exception anymore (it's caught already in the
function).

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-05-13 23:35:06 +02:00
jvoisin
ef665e6dc1 Please pylint 2019-05-13 23:31:46 +02:00
jvoisin
aa0ff643c4 Improve a bit the debug mode 2019-05-13 22:12:00 +02:00
jvoisin
dd9ead4ebe Document how mat2 compares to other software 2019-05-11 00:19:17 +02:00
jvoisin
d0ab2c3023 Bump the changelog 2019-05-10 22:16:38 +02:00
jvoisin
fe1950ac3e Test the cli's behaviour with valid and invalid files
This should ensure that if we decide to implement
some threading in the cli, a faulty file
won't break everything.
2019-05-09 21:08:52 +02:00
jvoisin
97abafdc58 Minor code cleanup 2019-05-09 09:41:05 +02:00
jvoisin
f1a06e805b Fix an erroneous errors message
This one was spotted by @fuzzy
2019-05-08 22:34:32 +02:00
jvoisin
4f0e0685ca Allow failure with bubblewrap for now 2019-05-08 21:36:29 +02:00
jvoisin
911d822c44 Add tests to find possible race-conditions in the cli 2019-05-08 21:30:54 +02:00
fuzzy
7e031c9757 typo 2019-05-03 02:39:15 -07:00
jvoisin
9516990693 Add some verification for "dangerous" tarfiles 2019-05-01 17:55:35 +02:00
jvoisin
a7ebb587e1 Handle weird permissions in tar archives 2019-04-27 22:48:40 +02:00
jvoisin
14a4cddb8b Improve the display of tarfile's members mtime 2019-04-27 21:15:06 +02:00
jvoisin
8e41b098d6 Add support for compressed tar files 2019-04-27 06:03:09 -07:00
jvoisin
82cc822a1d Add tar archive support 2019-04-27 04:05:36 -07:00
jvoisin
20ed5eb7d6 Improve a bit the verbosity of a test 2019-04-14 21:00:13 +02:00
jvoisin
05f429b197 Add support for xhtml files 2019-04-14 20:36:33 +02:00
jvoisin
74afa885f5 Please pylint 2019-03-30 10:39:39 +01:00
jvoisin
1e325c5b5b Please mypy
Apparently, mypy isn't able (yet?) to deal
with variables that are changing their types
at runtime.

Python is wonderful.
2019-03-30 10:33:16 +01:00
Antoine Tenart
6c7dc4fada README: update the usage description
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-03-29 19:30:33 +01:00
Antoine Tenart
1c79aa951e README: remove one trailing space
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-03-29 19:29:28 +01:00
Antoine Tenart
d454ef5b8e libmat2: fix dependency checks for cmd line utilities
The command line checks for command line utilities are done by trying to
access the executables and by throwing an exception when not found. This
lead to:
- The mat2 cmd line --check-dependencies option failing.
- The ffmpeg unit tests failing when ffmpeg isn't installed (even though
  it's an optional dependency).

This patch fixes it.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-03-29 19:29:28 +01:00
Antoine Tenart
c824a68dd8 libmat2: reshape the dependencies list
Invert the keys and values in DEPENDENCIES. It seems more natural to use
the key as a key in check_dependencies(), and the value as the value.
This also help in preparing for reworking the check_dependencies()
helper.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-03-29 19:29:28 +01:00
Antoine Tenart
c8602b8c7e mat2: display the default choice of --unknown-members in the help
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-03-26 18:16:03 +01:00
Antoine Tenart
b4b150a4f5 mat2: do not check the input file for W_OK
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-03-26 18:11:21 +01:00
Antoine Tenart
51ff89c512 doc: remove one trailing space in the man page
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-03-26 18:11:21 +01:00
jvoisin
b8c92fec09 Fix the testsuite 2019-03-23 00:41:23 +01:00
Antoine Tenart
2405df0469 mat2: fix typo in error message when a file does not exist
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-03-23 00:33:18 +01:00
Antoine Tenart
0e3c2c9b1b libmat2: audio: not all id3 types have a text attribute
Not all id3 types have a text attribute (such as mutagen.id3.APIC or
mutagen.id3.UFID). This leads to the get_meta helper to crash when
trying to access the text attribute of an object which does not have it.
Fixes it by checking the text attribute is available before accessing
it.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2019-03-23 00:32:44 +01:00
georg
2dc097baf3 CI: Add job to run tests on gentoo
Closes #93
2019-03-09 04:15:10 -08:00
georg
e40eb92b55 CI: Don't run 'pip3 install' the Docker images already does that 2019-03-07 21:53:43 +01:00
georg
a5a3e4677f CI: Drop obsolete tags to force jobs to run on whitewhale
This made sense some time ago. Nowdays, the whitewhale runner seems to
not really be reliable, so tagging jobs with it doesn't gain much.

Relates #45
2019-03-07 21:53:43 +01:00
georg
adf7adf854 CI: Don't install packages in the jobs, now handled via pre-built images
Relates #45
2019-03-07 21:53:43 +01:00
georg
2b4f2199e4 pylint: disable 'no-else-return' rule (R1705) 2019-03-07 21:53:43 +01:00
georg
1327089a30 Small fixes for newer pylint versions 2019-03-07 21:53:43 +01:00
georg
459ed07443 CI: Use own images to speed up jobs
Relates #45
Relates #93
2019-03-07 21:53:43 +01:00
georg
32ca58ef82 doing a release: this is about mat2, not mat
Additionally, fix a typo.
2019-03-07 12:00:58 +00:00
jvoisin
6b39edc3f2 Update a bit the release process 2019-03-06 22:03:46 +01:00
jvoisin
18570813c9 The gitlab mirror was useless, lets remove it 2019-03-06 22:03:31 +01:00
Brolf
5ac91cd4f9 Refactor {black,white}list into {block,allow}list
Closes #96
2019-03-05 23:13:42 +00:00
georg
c3f097a82b fix typo 2019-03-01 22:00:23 +00:00
jvoisin
cb8a016319 Bump the changelog 2019-02-28 00:13:28 +01:00
jvoisin
55214206b5 Improve the previous commit
- More tests
- More documentation
- Minor code cleanup
2019-02-27 23:53:07 +01:00
jvoisin
73d2966e8c Improve epub support 2019-02-27 23:04:38 +01:00
jvoisin
eb2e702f37 Document the previous commit 2019-02-25 15:37:44 +01:00
jvoisin
545dccc352 In archive-based formats, the mimetype file comes first
This should improve epub compatibility,
along with other formats as a side-effect
2019-02-24 23:32:32 +01:00
jvoisin
524bae5972 <title> is also an html metadata 2019-02-23 20:47:26 +01:00
jvoisin
c757a9b7ef Fix a bug in css cleaning
It's not mandatory to actually have a comment inside
comment delimiter, like `/**/`.
2019-02-23 20:21:11 +01:00
jvoisin
dda30c48b7 Fix the setup.py on Debian 2019-02-21 10:36:23 +01:00
jvoisin
8542e650ec Mention bubblewrap in the README 2019-02-21 01:44:01 +01:00
jvoisin
02ff21b158 Implement epub support 2019-02-20 16:28:11 -08:00
jvoisin
6b45064c78 Bump the changelog 2019-02-17 17:02:17 +01:00
jvoisin
a81b7658a8 Make the mandatory metadata warning generic
This should close #95.
2019-02-10 21:46:13 +01:00
jvoisin
6e63e03b86 Streamline a bit the previous commit 2019-02-09 15:23:16 +01:00
Poncho
a71488d459 bind mount /etc/ld.so.cache to the sandbox
without /etc/ld.so.cache available in the sandbox, tests fail on gentoo with:
/usr/bin/ffmpeg: error while loading shared libraries: libstdc++.so.6:
    cannot open shared object file: No such file or directory
2019-02-09 09:49:51 +01:00
jvoisin
6ef6aaa222 Improve a bit get_meta for libreoffice files 2019-02-08 23:23:56 +01:00
jvoisin
6cc034e81b Add support for html files 2019-02-08 23:05:18 +01:00
jvoisin
e1dd439fc8 Use of the archive refactoring for the office documents too 2019-02-07 22:19:37 +01:00
jvoisin
b9a62d798a Refactor a bit office get_meta handling
This should make easier to get more metadata from
archive-based file formats.
2019-02-04 00:31:26 +01:00
jvoisin
54e50450ad Fix the return code on parsers' list display 2019-02-03 21:09:12 +01:00
jvoisin
433609f8ea Implement .gif support 2019-02-03 21:01:58 +01:00
intrigeri
e8c1bb0e3c Whenever possible, use bwrap for subprocesses
This should closes  #90
2019-02-03 19:18:41 +01:00
jvoisin
8b5d0c286c Document how to get the coverage from the testsuite 2019-02-03 18:33:25 +01:00
jvoisin
8e84ba547a Add support for wmv 2019-02-02 19:19:36 +01:00
jvoisin
812bf2553b Rename the internal class used by the nautilus extension
This should solve collisions with people like me that
are copy/pasting the documentation, creating conflicts
with other extensions that are doing the very same thing.
2019-01-16 23:10:17 +01:00
Alan
94cdca1ed2 Update debian packaging status 2018-12-15 17:05:37 +01:00
Alan
b755aba8ea Fix debian build instructions 2018-12-15 17:05:32 +01:00
jvoisin
edce78859b Add a note in the readme about -L and pdf 2018-12-08 18:39:56 +01:00
jvoisin
0ab17b973b mat2 is now available on pypi 2018-11-11 20:49:24 +01:00
jvoisin
389311475c Add a readme for the nautilus extension 2018-11-11 19:58:51 +01:00
jvoisin
505be24be9 Bump the changelog 2018-11-10 12:46:31 +01:00
jvoisin
ef8265e86a Remove a useless image 2018-11-10 10:54:13 +01:00
jvoisin
1d75451b77 Add some type annotations to the nautilus extension 2018-11-08 21:40:33 +01:00
jvoisin
dc35ef56c8 Add a missing file :/ 2018-11-07 22:20:31 +01:00
jvoisin
3aa76cc58e Prove that the previous commit is working 2018-11-07 22:13:36 +01:00
jvoisin
8ff57c5803 Do not display control characters in output
Kudos to Sherry Taylor for reporting this issue ♥
2018-11-07 22:07:46 +01:00
jvoisin
04bb8c8ccf Add mp4 support 2018-10-28 07:41:04 -07:00
jvoisin
3a070b0ab7 Add support for zip files 2018-10-25 11:56:46 +02:00
jvoisin
283e5e5787 Improve archive-based parser's robustness against corrupted embedded files 2018-10-25 11:56:12 +02:00
jvoisin
513d897ea0 Implement get_meta() for archives 2018-10-25 11:29:50 +02:00
jvoisin
5a9dc388ad Minor refactorisation of how we're checking for exiftool's presence 2018-10-25 11:05:06 +02:00
jvoisin
5a08f5b7bf Add a test for tiff lightweight cleaning 2018-10-24 20:19:36 +02:00
jvoisin
fe885babee Implement lightweight cleaning for jpg 2018-10-24 19:35:07 +02:00
jvoisin
1040a594d6 Fix a stupid typo in the changelog 2018-10-23 17:13:53 +02:00
jvoisin
e510a225e3 Bump the changelog 2018-10-23 17:07:42 +02:00
jvoisin
a98962a0fa Document that FFmpeg is now an optional dependency 2018-10-23 16:57:18 +02:00
jvoisin
9a81b3adfd Improve type annotation coverage 2018-10-23 16:32:28 +02:00
jvoisin
f1a071d460 Implement lightweight cleaning for png and tiff 2018-10-23 16:22:11 +02:00
jvoisin
38df679a88 Optimize the handling of problematic files 2018-10-23 13:49:58 +02:00
jvoisin
44f267a596 Improve problematic filenames support 2018-10-22 16:56:05 +02:00
jvoisin
5bc88faedf Fix the testsuite on fedora 2018-10-22 13:55:09 +02:00
jvoisin
83389a63e9 Test mat2's reliability wrt. corrupted video files 2018-10-22 13:42:04 +02:00
jvoisin
e70ea811c9 Implement support for .avi files, via ffmpeg
- This commit introduces optional dependencies (namely ffmpeg):
  mat2 will spit a warning when trying to process an .avi file
  if ffmpeg isn't installed.
- Since metadata are obtained via exiftool, this commit
  also refactors a bit our exfitool wrapper.
2018-10-22 12:58:01 +02:00
jvoisin
2ae5d909c3 Make pyflakes happy 2018-10-18 21:22:28 +02:00
jvoisin
5896387ade Output metadata in a sorted fashion 2018-10-18 21:17:12 +02:00
jvoisin
d4c050a738 wtf python 2018-10-18 20:29:50 +02:00
jvoisin
f04d4b28fc Fix the tests on Debian? 2018-10-18 20:23:00 +02:00
jvoisin
da88d30689 Fix the CI on debian 2018-10-14 10:59:50 +02:00
Rémi Oudin
f1552b2ccb Make testsuite fail if coverage is under 100%
Fixes issue #61
2018-10-12 17:07:56 +02:00
jvoisin
2ba38dd2a1 Bump mypy typing coverage 2018-10-12 14:32:09 +02:00
jvoisin
b832a59414 Refactor lightweight mode implementation 2018-10-12 11:49:24 +02:00
Sébastien Helleu
6ce88b8b7f Fix typo in README 2018-10-11 21:40:58 +02:00
jvoisin
2444caccc0 Make pylint happier 2018-10-11 19:55:07 +02:00
jvoisin
b9dbd12ef9 Implement recursive metadata for FLAC files
Since FLAC files can contain covers, it makes sense
to parse their metadata
2018-10-11 19:52:47 +02:00
jvoisin
b2e153b69c Delete pictures of FLAC files 2018-10-11 18:15:11 +02:00
Simon Magnin
35dca4bf1c add recursivity for archive style files 2018-10-11 08:28:02 -07:00
jvoisin
4ed30b5e00 Add the mailing list announcement to the release process 2018-10-06 20:00:50 +02:00
jvoisin
0d25b18d26 Improve both the typing and the comments 2018-10-05 17:07:58 +02:00
jvoisin
d0f3534eff Hide unsupported extensions in mat2 -l 2018-10-05 12:43:21 +02:00
jvoisin
8675706c93 Improve the display of mat2 when no metadata are found
This should close #74
2018-10-05 12:35:35 +02:00
Poncho
5e196ecef8 Update logo
Use color palette an size according to
https://developer.gnome.org/hig/stable/icon-design.html.en
2018-10-05 11:13:31 +02:00
jvoisin
8e98593b02 Trash word/people.xml in office files 2018-10-04 16:28:20 +02:00
jvoisin
df252fd71a Remove a superfluous import 2018-10-04 16:19:38 +02:00
jvoisin
a1c39104fc Make the testsuite runnable on the installed MAT2 2018-10-04 16:16:52 +02:00
georg
34fbd633fd libmat2: fix shebang
Relates 0a2a398c9c
2018-10-03 18:38:28 +00:00
jvoisin
f1ceed13b5 Bump the changelog 2018-10-03 16:38:05 +02:00
jvoisin
5a5c642a46 Don't break office files for MS Office
We didn't take the whitelist into account while
removing dangling files from [Content_types].xml
2018-10-03 16:38:05 +02:00
jvoisin
84e302ac93 Remove file left behind by the testsuite 2018-10-03 16:38:05 +02:00
jvoisin
7901fdef2e Fix the testsuite 2018-10-03 15:29:46 +02:00
jvoisin
1b356b8c6f Improve mat2's cli reliability
- Replace some class members by instance members
- Don't thread the cleaning process anymore for now
2018-10-03 15:22:36 +02:00
jvoisin
c67bbafb2c Use [Content_Types].xml to improve MS Office coverage 2018-10-02 11:55:42 -07:00
georg
5b606f939d fix typo 2018-10-02 16:01:24 +00:00
jvoisin
156e81fb4c Check that cleaning twice doesn't break the file 2018-10-02 16:05:51 +02:00
jvoisin
9578e4b4ee Silence a bit the testsuite 2018-10-02 15:26:13 +02:00
jvoisin
a46a7eb6fa Update the CONTRIBUTING.md file wrt. to the previous commit 2018-10-02 11:12:50 +02:00
georg
a24c59b208 manpage: this is about mat2, not mat 2018-10-01 21:26:59 +00:00
jvoisin
652b8e519f Files processed via MAT2 are now accepted without warnings by MS Office 2018-10-01 12:25:37 -07:00
jvoisin
c14be47f95 Fix a typo in the README spotted by @georg 2018-10-01 15:51:22 +02:00
jvoisin
81a3881aa4 Please mypy 2018-09-30 19:55:17 +02:00
jvoisin
e342671ead Remove dangling references in MS Office's [Content_types].xml 2018-09-30 19:53:18 +02:00
jvoisin
212d9c472c Document mat2's output scheme in the manpage as well 2018-09-26 00:13:44 +02:00
jvoisin
a88107c9ca Document the output scheme in the README 2018-09-26 00:11:16 +02:00
jvoisin
7f629ed2e3 Run the testsuite exclusively on Whitewhale for now
This should fix the intermittent failures, thanks
to @pollo for the tip
2018-09-25 17:09:04 +02:00
jvoisin
719cdf20fa Second pass of minor formatting 2018-09-24 20:15:07 +02:00
jvoisin
2e243355f5 Fix some minor formatting issues 2018-09-24 19:50:24 +02:00
jvoisin
174d4a0ac0 Implement rsid stripping for office files
MS Office XML rsid is a "unique identifier used to track the editing session
when the physical character representing this section mark was last formatted."

See the following links for details:
- https://msdn.microsoft.com/en-us/library/office/documentformat.openxml.wordprocessing.previoussectionproperties.rsidrpr.aspx
- https://blogs.msdn.microsoft.com/brian_jones/2006/12/11/whats-up-with-all-those-rsids/.
2018-09-24 18:03:59 +02:00
jvoisin
fbcf68c280 Lexicographical sort on xml attributes for office files
In XML, the order of the attributes shouldn't be meaningful,
however, MS Office sorts attributes for a given XML tag
differently than LibreOffice.
2018-09-24 17:45:09 +02:00
jvoisin
9826de3526 Add a test for zip ordering 2018-09-20 14:04:46 +02:00
jvoisin
ab71c29a28 Make pyflakes happy 2018-09-20 01:19:22 +02:00
jvoisin
3d2842802c Split the tests 2018-09-20 01:13:59 +02:00
jvoisin
a1a06d023e Insert archive members in lexicographic order 2018-09-18 22:44:21 +02:00
jvoisin
9275d64be5 Add a link to the gentoo overlay 2018-09-17 21:11:48 +02:00
Yoann Lamouroux
0a2a398c9c trivial modification of all shebang.
`/usr/bin/python3` -> `/usr/bin/env python3`

It's always better to trust the environment defined path to bin/python, as
virtualenv become the way to go.
2018-09-12 14:58:27 +02:00
jvoisin
5cf94bd256 Bump coverage back to 100% 2018-09-12 14:54:54 +02:00
jvoisin
de65f4f4d4 Improve the resilience of MAT2 wrt. corrupted PNG 2018-09-09 19:09:05 +02:00
jvoisin
759efa03ee Fix a setuptool-related warning 2018-09-06 11:42:07 +02:00
jvoisin
9fe6f1023b Make pylint happy 2018-09-06 11:36:04 +02:00
jvoisin
e3d817f57e Split office and archives 2018-09-06 11:34:14 +02:00
jvoisin
2e9adab86a Improve a cli test resilience 2018-09-06 11:32:29 +02:00
jvoisin
c8c27dcf38 Mention "scambled exif" as a related software 2018-09-06 11:20:08 +02:00
jvoisin
120b204988 Change a bit the previous commit 2018-09-06 11:13:11 +02:00
Daniel Kahn Gillmor
f3cef319b9 Unknown Members: make policy use an Enum
Closes #60

Note: this changeset also ensures that clean.cleaned.docx is removed
up after the pytest is over.
2018-09-05 18:59:33 -04:00
Daniel Kahn Gillmor
2d9ba81a84 spelling correction.
while mat2 has both a thread model (a thread pool that strips metadata
in parallel) and a threat model (a list of malicious adversaries and
their capabilities that we are trying to defeat), i think this
paragraph is talking about the latter.
2018-09-05 13:00:28 -04:00
jvoisin
072ee1814d Remove defusedxml support and document why 2018-09-05 18:41:08 +02:00
jvoisin
3649c0ccaf Remove short version of dangerous/advanced options 2018-09-05 17:48:14 +02:00
Christian
119085f28d Add missing dependencies for the Nautilus extension to INSTALL.md 2018-09-05 17:42:39 +02:00
Christian
e515d907d7 Make sure target directory exists, assume MAT2 is in parent directory 2018-09-05 17:42:13 +02:00
jvoisin
46bb1b83ea Improve the previous commit 2018-09-05 17:26:09 +02:00
Daniel Kahn Gillmor
1d7e374e5b office: try all members, even when one fails
the end result will be the same -- an abort -- but the user will get
to see all the warnings for a particular file, instead of getting them
one at a time.
2018-09-04 18:28:04 -04:00
Daniel Kahn Gillmor
915dc634c4 document all unknown/unhandlable files even on abort
This makes it easy to get a list of all files that mat2 doesn't know
how to handle, without having to choose -u keep or -u omit.
2018-09-04 18:28:04 -04:00
Daniel Kahn Gillmor
10d60bd398 add --unknown-members argument to mat2
This allows the user to make use of parser.unknown_member_policy for
archive formats.

At the suggestion of @jvoisin, it also prints a scary warning if the
user explicitly chooses 'keep'.
2018-09-04 18:28:04 -04:00
Daniel Kahn Gillmor
4192a2daa3 office: create policy for what to do about unknown members
previously, encountering an unknown member meant that any parser of
this type would abort.

now, the user can set parser.unknown_member_policy to either 'omit' or
'keep' if they don't want the current action of 'abort'

note that this causes pylint to complain about branching depth for
remove_all() because of the nuanced error-handling.  I've disabled
this check.
2018-09-04 16:13:33 -04:00
jvoisin
9ce458cb3b Update the release process to create signed tarballs 2018-09-03 14:28:00 +02:00
jvoisin
907fc591cc Bump the coverage back to 100% 2018-09-01 16:58:34 +02:00
jvoisin
8255293d1d Add a link to the mailing list 2018-09-01 16:45:20 +02:00
jvoisin
6b7e8ad8c0 Add a .mailmap file 2018-09-01 16:12:03 +02:00
jvoisin
b7a8622682 Bump the changelog 2018-09-01 16:00:41 +02:00
Daniel Kahn Gillmor
3e2890eb9e three minor spelling fixes 2018-09-01 06:47:22 -07:00
jvoisin
91e80527fc Add archlinux to the CI 2018-09-01 15:41:22 +02:00
jvoisin
7877ba0da5 Fix a minor formatting issue 2018-09-01 14:16:55 +02:00
dkg
e2634f7a50 Logging cleanup 2018-09-01 05:14:32 -07:00
jvoisin
aba9b72d2c Fix some leftovers from the previous commit 2018-08-26 01:10:48 +02:00
Antoine Tenart
15dd3d84ff nautilus: rename the nautilus plugin
Rename the Nautilus plugin (removing 'nautilus' from the file name) as
it already lives in its own 'nautilus' directory. The same argument
applies when installing the plugin in a distro.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-08-26 01:09:41 +02:00
Antoine Tenart
588466f4a8 INSTALL: add instructions for the Fedora copr
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-08-24 18:47:39 +02:00
Antoine Tenart
cf89ff45c2 gitignore: exclude all hidden files from being committed
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-08-24 09:14:05 +02:00
Antoine Tenart
f583d12564 nautilus: remove swp file
A .swp file was committed by mistake. Remove it.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-08-24 09:09:49 +02:00
jvoisin
1c72448e58 Improve the detection of unsupported extensions in uppercase 2018-08-23 21:28:37 +02:00
Antoine Tenart
f068621628 libmat2: images: fix handling of .JPG files
Pixbuf only supports .jpeg files, not .jpg, so libmat2 looks for such an
extension and converts it if necessary. As this check is case sensitive,
processing .JPG files does not work.

Fixes #47.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-08-23 20:43:27 +02:00
jvoisin
fe09d81ab1 Don't forget to tell the downstreams about new releases 2018-08-19 15:51:44 +02:00
jvoisin
5be66dbe91 Mention the Arch linux's AUR package of MAT2 2018-08-19 15:51:23 +02:00
jvoisin
ee496cfa7f Fix a typo spotted by @Francois_B 2018-08-19 15:51:09 +02:00
jvoisin
6e2e411a2a Add an INSTALL.md file 2018-08-08 20:45:09 +02:00
jvoisin
2ce1dc793e Bump the changelog 2018-08-03 22:20:24 +02:00
jvoisin
e27768824a Change mat2's logo 2018-08-03 21:45:41 +02:00
jvoisin
36c5bad140 Improve our .gitignore 2018-07-30 23:00:33 +02:00
jvoisin
b5a9520a60 Add a cli-related test 2018-07-30 22:54:41 +02:00
jvoisin
a1257c538b Add some tests about pathological files 2018-07-30 22:36:36 +02:00
Antoine Tenart
6d8e999f12 Rename image to icon in the Nautilus extension
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-07-26 09:01:27 +02:00
Antoine Tenart
1bc4c7aac9 Switch columns in the Nautilus extension
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-07-26 09:01:01 +02:00
Antoine Tenart
03245a8731 Rename the Nautilus path column to file
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-07-26 08:57:33 +02:00
Antoine Tenart
27445e9134 Rename the Nautilus exit button to close
Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-07-26 08:57:09 +02:00
jvoisin
b32ba9f736 Improve a bit nautilus' popup 2018-07-25 22:48:05 +02:00
jvoisin
e9f28edf73 Add a man page and document how to keep it up to date 2018-07-24 22:34:33 +02:00
jvoisin
7697f9c085 Improve the linters' coverage 2018-07-23 23:55:45 +02:00
jvoisin
e920083559 The Nautilus extension is now working 2018-07-23 23:39:06 +02:00
georg
71b1ced842 AbstractParser: Fix typos 2018-07-21 00:46:48 +00:00
jvoisin
942859601d Improve the code's documentation 2018-07-19 23:10:27 +02:00
jvoisin
565cb66d14 Minor simplification in how we're handling xml for office files 2018-07-19 22:55:08 +02:00
jvoisin
052a356750 Implement a much better Nautilus extension thanks to @atenart
Co-authored-by: Antoine Tenart <antoine.tenart@ack.tf>
Co-authored-by: jvoisin <julien.voisin@dustri.org>
2018-07-19 00:11:30 +02:00
jvoisin
2f670651cf Minor cleanup of the Nautilus extension's code 2018-07-18 23:20:51 +02:00
jvoisin
0cd510938a Minor code simplification 2018-07-18 23:15:47 +02:00
jvoisin
dc026f99ad Show if files are supported or not in the Nautilus extension 2018-07-18 23:12:55 +02:00
jvoisin
0aac0d644d Show a pretty icon for files in the Nautilus extension 2018-07-18 22:53:56 +02:00
jvoisin
17e69b6005 Change a button in the nautilus extension 2018-07-18 22:39:18 +02:00
jvoisin
cf5f3b268d Add a separator for the Nautilus extension 2018-07-18 22:39:10 +02:00
jvoisin
a5eede9a21 Remove the disclaimer from the Nautilus extension 2018-07-18 22:38:42 +02:00
Antoine Tenart
926e8dac5f nautilus: first working version
Improve the nautilus extension to get to a first working version:
- Single and multiple selections are working.
- The menu item only is there if mat2 has a chance to work on the
  selected files.
- Errors are reported using notifications.

Signed-off-by: Antoine Tenart <antoine.tenart@ack.tf>
2018-07-18 22:38:05 +02:00
georg
edc5f86552 README: Fix typo 2018-07-16 15:09:22 +00:00
jvoisin
84d50f97c0 Add a check for a missed dependency in ./mat2 -c 2018-07-15 17:00:01 +02:00
jvoisin
8093dce88e Bump the changelog 2018-07-10 21:41:24 +02:00
jvoisin
5a7c7f35f7 Remove print from libmat, and use the logging module instead
This should close #28
2018-07-10 21:30:38 +02:00
jvoisin
d5861e4653 Implement a check for dependencies in mat2
Example use:

```
$ mat2 -c
Dependencies required for MAT2 0.1.3:
- Cairo: yes
- Exiftool: yes
- GdkPixbuf from PyGobject: yes
- Mutagen: yes
- Poppler from PyGobject: yes
- PyGobject: yes
```

This should close #35
2018-07-10 21:24:26 +02:00
jvoisin
22e3918f67 Add pylint3 to the ci 2018-07-09 01:22:08 +02:00
jvoisin
080d6769ca Make pylint even happier 2018-07-09 01:11:44 +02:00
jvoisin
86fe3aa584 Fix the previous commit 2018-07-09 00:30:16 +02:00
jvoisin
cc327b1592 Minor improvement of fedora's duration in the testsuite 2018-07-09 00:27:40 +02:00
jvoisin
b4edd6d2a2 Document that MAT2 not being able to detect metadata doesn't mean that the file is clean 2018-07-09 00:17:59 +02:00
jvoisin
bd357b85f8 Remove a useless option that was never implemented anyway 2018-07-09 00:13:16 +02:00
jvoisin
8c21006e6c Fix some pep8 issues spotted by pyflakes 2018-07-08 22:40:36 +02:00
jvoisin
f49aa5cab7 Achieve 100% coverage! 2018-07-08 22:27:37 +02:00
jvoisin
52a2c800b7 Bump coverage again 2018-07-08 21:50:52 +02:00
jvoisin
ad3e7ccee8 Bump coverage for office files and fix some related crashes 2018-07-08 21:35:45 +02:00
jvoisin
ca01484126 Silence a mypy's stupid warning 2018-07-08 17:12:17 +02:00
jvoisin
f9bc022c96 Add defusedxml as an (optional) way to prevent XML-based attacks
Those attacks are DoS-only.
2018-07-08 17:07:26 +02:00
jvoisin
72e1fda18d Remove a leftover print 2018-07-08 15:19:18 +02:00
jvoisin
3cd4f9111f Bump coverage for torrent handling 2018-07-08 15:13:03 +02:00
jvoisin
b5fcddd6a6 Simplify how torrent files are handled
- Rework the testsuite wrt. torrent
- fail at parser's instantiation on corrupted torrent,
  instead of during `get_meta` or `remove_all` call
2018-07-08 13:49:11 +02:00
jvoisin
7ea362d908 Bump the coverage for pdf 2018-07-07 18:12:33 +02:00
jvoisin
85455a4419 Fix a mistake in office file revisions handling 2018-07-07 18:05:54 +02:00
jvoisin
9f631a1bb1 Bump a bit the coverage 2018-07-07 18:02:53 +02:00
70 changed files with 6622 additions and 998 deletions

45
.github/workflows/builds.yaml vendored Normal file
View File

@@ -0,0 +1,45 @@
name: CI for Python versions
on:
pull_request:
push:
schedule:
- cron: '0 16 * * 5'
jobs:
linting:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v5
- uses: actions/setup-python@v5
- run: pip install ruff
- run: |
ruff check .
build:
needs: linting
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2"]
steps:
- uses: actions/checkout@v5
- name: Setup Python
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
sudo apt-get install --no-install-recommends --no-install-suggests --yes \
ffmpeg \
gir1.2-gdkpixbuf-2.0 \
gir1.2-poppler-0.18 \
gir1.2-rsvg-2.0 \
libimage-exiftool-perl \
python3-gi-cairo \
libcairo2-dev \
libgirepository-2.0-dev \
libgirepository1.0-dev \
gobject-introspection \
python3-mutagen
pip install .
- name: Build and run the testsuite
run: python3 -m unittest discover -v

4
.gitignore vendored
View File

@@ -1,5 +1,9 @@
.*
*.pyc
.coverage
.eggs
.mypy_cache/
build
dist
mat2.egg-info
tags

View File

@@ -1,46 +0,0 @@
image: debian
stages:
- linting
- test
bandit:
stage: linting
script: # TODO: remove B405 and B314
- apt-get -qqy update
- apt-get -qqy install --no-install-recommends python3-bandit
- bandit -r ./libmat2 --format txt --skip B101,B404,B603,B405,B314
pyflakes:
stage: linting
script:
- apt-get -qqy update
- apt-get -qqy install --no-install-recommends pyflakes3
- pyflakes3 ./libmat2 ./mat2 ./tests/
mypy:
stage: linting
script:
- apt-get -qqy update
- apt-get -qqy install --no-install-recommends python3-pip
- pip3 install mypy
- mypy mat2 libmat2/*.py --ignore-missing-imports
tests:debian:
stage: test
script:
- apt-get -qqy update
- apt-get -qqy install --no-install-recommends python3-mutagen python3-gi-cairo gir1.2-poppler-0.18 gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl python3-coverage
- python3-coverage run --branch -m unittest discover -s tests/
- python3-coverage report -m --include 'libmat2/*'
tests:fedora:
image: fedora
stage: test
script:
- dnf install -y python3 python3-mutagen python3-gobject
- dnf install -y gdk-pixbuf2 poppler-glib gdk-pixbuf2 gdk-pixbuf2-modules
- dnf install -y cairo-gobject cairo python3-cairo
- dnf install -y perl-Image-ExifTool mailcap
- gdk-pixbuf-query-loaders-64 > /usr/lib64/gdk-pixbuf-2.0/2.10.0/loaders.cache
- python3 setup.py test

5
.mailmap Normal file
View File

@@ -0,0 +1,5 @@
Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> totallylegit <totallylegit@dustri.org>
Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> jvoisin <julien.voisin@dustri.org>
Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> jvoisin <jvoisin@riseup.net>
Daniel Kahn Gillmor <dkg@fifthhorseman.net> dkg <dkg@fifthhorseman.net>

View File

@@ -1,8 +1,211 @@
# 0.13.5 - 2025-01-09
- Keep orientation metadata on jpeg and tiff files
- Improve cairo-related error/exceptions handling
- Improve the logging
- Improve the sandboxing
- Improve Python3.12 support
- Improve MSOffice documents handling
# 0.13.4 - 2023-08-02
- Add documentation about mat2 on OSX
- Make use of python3.7 constructs to simplify code
- Use moderner type annotations
- Harden get_meta in archive.py against variants of CVE-2022-35410
- Improve MSOffice document support
- Package the manpage on pypi
# 0.13.3 - 2023-02-23
- Fix a decorator argument
# 0.13.2 - 2023-01-28
- Fix a crash on some python versions
# 0.13.1 - 2023-01-07
- Improve xlsx support
- Remove the Nautilus extension
# 0.13.0 - 2022-07-06
- Fix an arbitrary file read (CVE-2022-35410)
- Add support for heic files
# 0.12.4 - 2022-04-30
- Fix possible errors/crashes when processing multiple files
via the command line interface
- Use a fixed PDF version for the output
- Improve compatibility with modern versions of rsvg
- Improve the robustness of the command line interface with
regard to control characters
# 0.12.3 - 2022-01-06
- Implement code for internationalization
- Keep individual files compression type in zip files
- Increase the robustness of mat2 against weird/corrupted files
- Fix the dolphin integration
- Add a fuzzer
# 0.12.2 - 2021-08-29
- Add support for aiff files
- Improve MS Office support
- Improve compatibility with newer/older version of mat2's dependencies
- Fix possible issues with the resolution of processed pdf
# 0.12.1 - 2021-03-19
- Improve epub support
- Improve MS Office support
# 0.12.0 - 2020-12-18
- Improve significantly MS Office formats support
- Fix some typos in the Nautilus extension
- Improve reliability of the mp3, pdf and svg parsers
- Improve compatibility with ffmpeg when sandboxing is used
- Improve the dolphin extension usability
- libmat2 now raises a ValueError on malformed files while trying to
find the right parser, instead of returning None
# 0.11.0 - 2020-03-29
- Improve significantly MS Office formats support
- Refactor how mat2 looks for executables
# 0.10.1 - 2020-02-09
- Improve the documentation and the manpage
- Improve the robustness of css, html, png, gdk-based, exiftool-based parsers
- Future-proof a bit the testsuite
- Handle tiff files with a .tif extension
- Improve the sandbox' usability
- Add support for wav files
# 0.10.0 - 2019-11-30
- Make mat2 work on Python3.8
- Minor improvement of ppt handling
- Minor improvement of odt handling
- Add an integration KDE's file manager: Dolphin
- mat2 now copies file permissions on the cleaned files
- Add a flag to disable sandboxing
- Tighten a bit the sandboxing
- Improve handling of MSOffice documents
- Add support for inplace cleaning
- Better handling of mutually-exclusive arguments in the command line
- Add support for svg
- Add support for ppm
- Cleaned zip files are compressed by default
- Minor performances improvement when dealing with images/video files
- Better handling of optional dependencies
# 0.9.0 - 2019-05-10
- Add tar/tar.gz/tar.bz2/tar.zx archives support
- Add support for xhtml files
- Improve handling of read-only files
- Improve a bit the command line's documentation
- Fix a confusing error message
- Add even more tests
- Usuals internal cleanups/refactorings
# 0.8.0 - 2019-02-28
- Add support for epub files
- Fix the setup.py file crashing on non-utf8 platforms
- Improve css support
- Improve html support
# 0.7.0 - 2019-02-17
- Add support for wmv files
- Add support for gif files
- Add support for html files
- Sandbox external processes via bubblewrap
- Simplify archive-based formats processing
- The Nautilus extension now plays nicer with other extensions
# 0.6.0 - 2018-11-10
- Add lightweight cleaning for jpeg
- Add support for zip files
- Add support for mp4 files
- Improve metadata extraction for archives
- Improve robustness against corrupted embedded files
- Fix a possible security issue on some terminals (control character
injection via --show)
- Various internal cleanup/improvements
# 0.5.0 - 2018-10-23
- Video (.avi files for now) support, via FFmpeg, optionally
- Lightweight cleaning for png and tiff files
- Processing files starting with a dash is now quicker
- Metadata are now displayed sorted
- Recursive metadata support for FLAC files
- Unsupported extensions aren't displayed in `./mat2 -l` anymore
- Improve the display when no metadata are found
- Update the logo according to the GNOME guidelines
- The testsuite is now runnable on the installed version of mat2
- Various internal cleanup/improvements
# 0.4.0 - 2018-10-03
- There is now a policy, for advanced users, to deal with unknown embedded fileformats
- Improve the documentation
- Various minor refactoring
- Improve how corrupted PNG are handled
- Dangerous/advanced cli's options no longer have short versions
- Significant improvements to office files anonymisation
- Archive members are sorted lexicographically
- XML attributes are sorted lexicographically too
- RSID are now stripped
- Dangling references in [Content_types].xml are now removed
- Significant improvements to office files support
- Anonimysed office files can now be opened by MS Office without warnings
- The CLI isn't threaded anymore, for it was causing issues
- Various misc typo fix
# 0.3.1 - 2018-09-01
- Document how to install mat2 for various distributions
- Fix various typos in the documentation/comments
- Add ArchLinux to the CI to ensure that mat2 is running on it
- Fix the handling of files with a name ending in `.JPG`
- Improve the detection of unsupported extensions in upper-case
- Streamline mat2's logging
# 0.3.0 - 2018-08-03
- Add a check for missing dependencies
- Add Nautilus extension
- Minors code simplifications
- Improve our linters' coverage
- Add a manpage
- Add folder/multiple files related tests
- Change the logo
# 0.2.0 - 2018-07-10
- Fix various crashes dues to malformed files
- Simplify various code-paths
- Remove superfluous debug message
- Remove the `--check` option that never was implemented anyway
- Add a `-c` option to check for mat2's dependencies
# 0.1.3 - 2018-07-06
- Improve MAT2 resilience against corrupted images
- Improve mat2 resilience against corrupted images
- Check that the minimal version of Poppler is available
- Simplify how MAT2 deals with office files
- Simplify how mat2 deals with office files
- Improve cleaning of office files
- Thumbnails are removed
- Revisions are removed
@@ -14,8 +217,8 @@
- Rename some files to ease the packaging
- Add linters to the CI (mypy, bandit and pyflakes)
- Prevent exitftool-related parameters injections
- Improve MAT2's resilience against corrupted files
- Make MAT2 work on fedora, thanks to @atenart
- Improve mat2's resilience against corrupted files
- Make mat2 work on fedora, thanks to @atenart
- Tighten the threat model
- Simplify and improve how office files are handled

View File

@@ -1,11 +1,17 @@
# Contributing to MAT2
# Contributing to mat2
The main repository for MAT2 is on [0xacab]( https://0xacab.org/jvoisin/mat2 ),
with a mirror on [gitlab.com]( https://gitlab.com/jvoisin/mat2 ).
The main repository for mat2 is on [github]( https://github.com/jvoisin/mat2 ),
but you can send patches to jvoisin by [email](https://dustri.org/) if you prefer.
Do feel free to pick up [an issue]( https://0xacab.org/jvoisin/mat2/issues )
and to send a pull-request. Please do check that everything is fine by running the
testsuite with `python3 -m unittest discover -v` before submitting one :)
Do feel free to pick up [an issue]( https://github.com/jvoisin/mat2/issues )
and to send a pull-request.
Before sending the pull-request, please do check that everything is fine by
running the full test suite in GitLab. To do that, after forking mat2 in GitLab,
you need to go in Settings -> CI/CD -> Runner and there enable shared runners.
Mat2 also has unit tests (that are also run in the full test suite). You can run
them with `python3 -m unittest discover -v`.
If you're fixing a bug or adding a new feature, please add tests accordingly,
this will greatly improve the odds of your merge-request getting merged.
@@ -16,14 +22,24 @@ If you're adding a new fileformat, please add tests for:
2. Cleaning metadata
3. Raising `ValueError` upon a corrupted file
Since MAT2 is written in Python3, please conform as much as possible to the
Since mat2 is written in Python3, please conform as much as possible to the
[pep8]( https://pep8.org/ ) style; except where it makes no sense of course.
# Doing a release
1. Update the [changelog](https://0xacab.org/jvoisin/mat2/blob/master/CHANGELOG.md)
2. Update the version in the [mat2](https://0xacab.org/jvoisin/mat2/blob/master/mat2) file
3. Update the version in the [setup.py](https://0xacab.org/jvoisin/mat2/blob/master/setup.py) file
4. Commit the changelog, mat2 and setup.py files
5. Create a tag with `git tag -s $VERSION`
6. Push the tag with `git push --tags`
1. Update the [changelog](https://github.com/jvoisin/mat2/blob/master/CHANGELOG.md)
2. Update the version in the [mat2](https://github.com/jvoisin/mat2/blob/master/mat2) file
3. Update the version in the [setup.py](https://github.com/jvoisin/mat2/blob/master/setup.py) file
4. Update the version in the [pyproject.toml](https://github.com/jvoisin/mat2/blob/master/yproject.toml) file
5. Update the version and date in the [man page](https://github.com/jvoisin/mat2/blob/master/doc/mat2.1)
6. Commit the modified files
7. Create a tag with `git tag -s $VERSION`
8. Push the commit with `git push origin master`
9. Push the tag with `git push --tags`
10. Download the gitlab archive of the release
11. Diff it against the local copy
12. If there is no difference, sign the archive with `gpg --armor --detach-sign mat2-$VERSION.tar.xz`
13. Upload the signature on Gitlab's [tag page](https://github.com/jvoisin/mat2/tags) and add the changelog there
14. Announce the release on the [mailing list](https://mailman.boum.org/listinfo/mat-dev)
15. Sign'n'upload the new version on pypi with `python3 setup.py sdist bdist_wheel` then `twine upload -s dist/*`
16. Do the secret release dance

70
INSTALL.md Normal file
View File

@@ -0,0 +1,70 @@
# Python ecosystem
If you feel like running arbitrary code downloaded over the
internet (pypi doesn't support gpg signatures [anymore](https://github.com/pypa/python-packaging-user-guide/pull/466)),
mat2 is [available on pypi](https://pypi.org/project/mat2/), and can be
installed like this:
```
pip3 install mat2
```
# GNU/Linux
## Optional dependencies
When [bubblewrap](https://github.com/projectatomic/bubblewrap) is
installed, mat2 uses it to sandbox any external processes it invokes.
## Arch Linux
Thanks to [kpcyrd](https://archlinux.org/packages/?maintainer=kpcyrd), there is an package available on
[Arch linux's AUR](https://archlinux.org/packages/extra/any/mat2/).
## Debian
There is a package available in [Debian](https://packages.debian.org/search?keywords=mat2&searchon=names&section=all) and you can install mat2 with:
```
apt install mat2
```
## Fedora
Thanks to [atenart](https://ack.tf/), there is a package available on
[Fedora's copr]( https://copr.fedorainfracloud.org/coprs/atenart/mat2/ ).
First you need to enable mat2's copr:
```
dnf -y copr enable atenart/mat2
```
Then you can install mat2:
```
dnf -y install mat2
```
## Gentoo
mat2 is available in the [torbrowser overlay](https://github.com/MeisterP/torbrowser-overlay).
# OSX
## Homebrew
mat2 is [available on homebrew](https://formulae.brew.sh/formula/mat2):
```
brew install mat2
```
## MacPorts
mat2 is [available on MacPorts](https://ports.macports.org/port/mat2/):
```
port install mat2
```

107
README.md
View File

@@ -1,106 +1 @@
```
_____ _____ _____ ___
| | _ |_ _|_ | Keep you data,
| | | | | | | | _| trash your meta!
|_|_|_|__|__| |_| |___|
```
This software is currently in **beta**, please don't use it for anything
critical.
# Metadata and privacy
Metadata consist of information that characterizes data.
Metadata are used to provide documentation for data products.
In essence, metadata answer who, what, when, where, why, and how about
every facet of the data that are being documented.
Metadata within a file can tell a lot about you.
Cameras record data about when a picture was taken and what
camera was used. Office documents like PDF or Office automatically adds
author and company information to documents and spreadsheets.
Maybe you don't want to disclose those information on the web.
This is precisely the job of MAT2: getting rid, as much as possible, of
metadata.
# Requirements
- `python3-mutagen` for audio support
- `python3-gi-cairo` and `gir1.2-poppler-0.18` for PDF support
- `gir1.2-gdkpixbuf-2.0` for images support
- `libimage-exiftool-perl` for everything else
Please note that MAT2 requires at least Python3.5, meaning that it
doesn't run on [Debian Jessie](https://packages.debian.org/jessie/python3),
# Running the test suite
```bash
$ python3 -m unittest discover -v
```
# How to use MAT2
```bash
usage: mat2 [-h] [-v] [-l] [-c | -s | -L] [files [files ...]]
Metadata anonymisation toolkit 2
positional arguments:
files
optional arguments:
-h, --help show this help message and exit
-v, --version show program's version number and exit
-l, --list list all supported fileformats
-c, --check check if a file is free of harmful metadatas
-s, --show list all the harmful metadata of a file without removing
them
-L, --lightweight remove SOME metadata
```
# Related software
- The first iteration of [MAT](http://mat.boum.org)
- [Exiftool](https://sno.phy.queensu.ca/~phil/exiftool/mat)
- [pdf-redact-tools](https://github.com/firstlookmedia/pdf-redact-tools), that
tries to deal with *printer dots* too.
- [pdfparanoia](https://github.com/kanzure/pdfparanoia), that removes
watermarks from PDF.
# Contact
If possible, use the [issues system](https://0xacab.org/jvoisin/mat2/issues).
If you think that a more private contact is needed (eg. for reporting security issues),
you can email Julien (jvoisin) Voisin at `julien.voisin+mat@dustri.org`,
using the gpg key `9FCDEE9E1A381F311EA62A7404D041E8171901CC`.
# License
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org>
# Thanks
MAT2 wouldn't exist without:
- the [Google Summer of Code](https://summerofcode.withgoogle.com/);
- the fine people from [Tails]( https://tails.boum.org);
- friends
Many thanks to them!
# This repository is deprecated, please use https://github.com/jvoisin/mat2 instead

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.1 KiB

After

Width:  |  Height:  |  Size: 28 KiB

View File

@@ -1,27 +1,630 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg xmlns="http://www.w3.org/2000/svg" version="1.0">
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
inkscape:export-ydpi="384"
inkscape:export-xdpi="384"
inkscape:export-filename="mat2.png"
width="128"
height="128"
id="svg11300"
sodipodi:version="0.32"
inkscape:version="0.92.2 2405546, 2018-03-11"
sodipodi:docname="mat2.svg"
inkscape:output_extension="org.inkscape.output.svg.inkscape"
version="1.0"
style="display:inline;enable-background:new"
viewBox="0 0 128 128">
<title
id="title4162">Adwaita Icon Template</title>
<defs
id="defs3" />
<sodipodi:namedview
stroke="#ef2929"
fill="#f57900"
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="0.25490196"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="4.0446508"
inkscape:cx="99.116732"
inkscape:cy="42.537095"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:grid-bbox="true"
inkscape:document-units="px"
inkscape:showpageshadow="false"
inkscape:window-width="1920"
inkscape:window-height="1021"
inkscape:window-x="0"
inkscape:window-y="22"
width="400px"
height="300px"
inkscape:snap-nodes="true"
inkscape:snap-bbox="false"
objecttolerance="7"
gridtolerance="12"
guidetolerance="13"
inkscape:window-maximized="1"
inkscape:pagecheckerboard="false"
showguides="true"
inkscape:guide-bbox="true"
inkscape:locked="false"
inkscape:measure-start="0,0"
inkscape:measure-end="0,0"
inkscape:object-nodes="true"
inkscape:bbox-nodes="true"
inkscape:snap-global="true"
inkscape:object-paths="true"
inkscape:snap-intersection-paths="true"
inkscape:snap-bbox-edge-midpoints="true"
inkscape:snap-bbox-midpoints="true"
showborder="false"
inkscape:snap-center="true"
inkscape:snap-object-midpoints="true"
inkscape:snap-midpoints="true"
inkscape:snap-smooth-nodes="true">
<inkscape:grid
type="xygrid"
id="grid5883"
spacingx="2"
spacingy="2"
enabled="true"
visible="true"
empspacing="4"
originx="0"
originy="0" />
<sodipodi:guide
position="64,8"
orientation="0,1"
id="guide1073"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="12,64"
orientation="1,0"
id="guide1075"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,104"
orientation="0,1"
id="guide1099"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,128"
orientation="0,1"
id="guide993"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="104,64"
orientation="1,0"
id="guide995"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="9.2651362e-08,64"
orientation="1,0"
id="guide867"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="120,64"
orientation="1,0"
id="guide869"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,116"
orientation="0,1"
id="guide871"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<inkscape:grid
type="xygrid"
id="grid873"
spacingx="1"
spacingy="1"
empspacing="8"
color="#000000"
opacity="0.49019608"
empcolor="#000000"
empopacity="0.08627451"
dotted="true" />
<sodipodi:guide
position="24,64"
orientation="1,0"
id="guide877"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="116,64"
orientation="1,0"
id="guide879"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,120"
orientation="0,1"
id="guide881"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,12"
orientation="0,1"
id="guide883"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="8,64"
orientation="1,0"
id="guide885"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="128,64"
orientation="1,0"
id="guide887"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,0"
orientation="0,1"
id="guide897"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,24"
orientation="0,1"
id="guide899"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="256,256"
orientation="-0.70710678,0.70710678"
id="guide950"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,64"
orientation="0.70710678,0.70710678"
id="guide952"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
</sodipodi:namedview>
<metadata
id="metadata4">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:creator>
<cc:Agent>
<dc:title>GNOME Design Team</dc:title>
</cc:Agent>
</dc:creator>
<dc:source />
<cc:license
rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
<dc:title>Adwaita Icon Template</dc:title>
<dc:subject>
<rdf:Bag />
</dc:subject>
<dc:date />
<dc:rights>
<cc:Agent>
<dc:title />
</cc:Agent>
</dc:rights>
<dc:publisher>
<cc:Agent>
<dc:title />
</cc:Agent>
</dc:publisher>
<dc:identifier />
<dc:relation />
<dc:language />
<dc:coverage />
<dc:description />
<dc:contributor>
<cc:Agent>
<dc:title />
</cc:Agent>
</dc:contributor>
</cc:Work>
<cc:License
rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
<cc:permits
rdf:resource="http://creativecommons.org/ns#Reproduction" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#Distribution" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Notice" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Attribution" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#ShareAlike" />
</cc:License>
</rdf:RDF>
</metadata>
<g
fill="#27628a"
stroke="none">
<path
d="M0 5120 l0 -5120 3000 0 3000 0 0 5120 0 5120 -3000 0 -3000 0 0 -5120z" />
</g>
<g
fill="#7fcae7"
stroke="none">
<path
d="M 0,5120 V 0 h 3000 3000 v 5120 5120 H 3000 0 Z m 3041,3965 c 257.1951,-231.2173 270.8768,-244.4494 1132,-978 100.0843,-559.7796 173.9788,-986.5359 279,-1586 -165.7863,-405.0485 -178.8353,-430.8722 -292,-721 650.6072,-1421.1218 667.3936,-1452.2872 1190,-2550 -2109.4504,-0.035 -2130.9695,-0.025 -4468.86586,0.037 72.33788,69.7996 74.76441,71.6861 148.86586,140.963 -129.0483,91.5488 -134.68166,93.6858 -367,225 175.86245,383.2532 323.97381,668.4741 527,1073 35.6121,292.0899 72.3384,584.0406 109,876 5.074,391.6586 9.0034,783.3294 13,1175 314.3202,597.9247 654.4179,1182.5892 964,1783 88.7542,312.5107 121.9361,512.8332 194,862 95.2778,168.6736 102.3771,181.1881 273,473 113.1881,-286.567 245.9452,-613.0146 298,-773 z" />
</g>
<g
fill="#c0dede"
stroke="none">
<path
d="M0 1625 l0 -1625 3000 0 3000 0 0 1625 0 1625 -3000 0 -3000 0 0 -1625z" />
</g>
<g
fill="#ffffff"
stroke="none">
<path
d="M 881.01695,3249.9206 C 1286.0459,3091.4742 1546.5278,3035.4925 1889,2924 c 129.95,-482.4131 173.4726,-686.2614 331,-1262 132.796,95.3371 216.2935,142.9991 359,242 116.2556,-360.389 199.5642,-636.2515 320,-1025 108.0281,-100.84978 136.3812,-131.67871 296,-299 10,0 254,309 487,616 83.6789,470.193 92.832,516.3155 215,1032 422.9371,260.0129 459.4089,278.2641 878,528 0,69.3333 0,138.6667 0,208 253.7343,134.9322 263.2776,139.2776 570,286 H 3107 c -2226,0 -2219.49894,-0.1145 -2225.98305,-0.079 z" />
id="layer1"
inkscape:label="Icon"
inkscape:groupmode="layer"
style="display:inline"
transform="translate(0,-172)">
<g
inkscape:groupmode="layer"
id="layer2"
inkscape:label="baseplate"
style="display:none">
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Normal';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
x="7.9499588"
y="148.65199"
id="context"
inkscape:label="context"><tspan
sodipodi:role="line"
id="tspan2716"
x="7.9499588"
y="148.65199"
style="font-size:5.33333349px;stroke-width:0.33264872">apps</tspan></text>
<text
inkscape:label="icon-name"
id="text3021"
y="157.23398"
x="7.7533054"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
xml:space="preserve"><tspan
y="157.23398"
x="7.7533054"
id="tspan3023"
sodipodi:role="line"
style="font-size:5.33333349px;stroke-width:0.33264872">org.gnome.</tspan></text>
<g
style="display:inline;fill:#000000;enable-background:new"
transform="matrix(7.9911709,0,0,8.0036407,-167.7909,-4846.0776)"
id="g12027"
inkscape:export-xdpi="12"
inkscape:export-ydpi="12" />
<rect
style="display:inline;overflow:visible;visibility:visible;fill:#f0f0f0;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.5;marker:none;enable-background:accumulate"
id="rect13805"
width="128"
height="128"
x="9.2651362e-08"
y="172"
inkscape:label="512x512" />
<g
id="g883"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g900"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g1168"
transform="matrix(0.25,0,0,0.25,6.9488522e-8,225)">
<circle
cx="256"
cy="44"
r="240"
id="path1142"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-180"
x="96"
height="448"
width="319.99979"
id="rect1110"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-164"
x="48"
height="416"
width="416"
id="rect1110-8"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
<rect
ry="32"
rx="32"
y="-116"
x="32"
height="320"
width="448"
id="rect1110-8-9"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
</g>
</g>
<g
inkscape:groupmode="layer"
id="layer9"
inkscape:label="hires"
style="display:none" />
<g
id="g944"
transform="matrix(1,0,0,0.93868822,0,14.545966)">
<path
style="fill:#99c1f1;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.41013032;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 50.53899,195.25817 6.396029,-11.43484 1.082405,-0.87215 4.821622,-10.46578 0.885604,-0.38763 2.558412,4.74837 2.755213,9.59364 1.672808,1.35667 3.542417,-0.87215 5.707227,12.59771 12.988859,9.59364 3.050415,3.87621 v 2.71335 l -16.334476,-1.25977 -7.084833,1.45359 -4.428021,-0.38763 -7.084833,0.29072 -11.414452,-0.58143 -3.640817,0.96905 -9.052843,-1.64739 -2.066409,0.0969 -1.476008,-0.48452 1.377607,-1.45358 1.869609,-1.06596 6.002428,-11.04722 1.279206,0.48453 5.412025,-6.49267 z"
id="path3455"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 49.358184,215.31759 -3.444016,0.9206 -9.003641,-1.74429 -1.918809,0.24226 -1.623608,-0.58143 1.574407,-1.50204 1.722008,-0.96905 5.953228,-11.09567 1.279205,0.53298 5.510426,-6.54112 0.344401,0.29072 -4.969223,10.27197 2.214011,1.93811 -0.246001,4.45765 z"
id="path3459"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.438601,195.22912 6.470906,-11.5803 1.113274,-0.6167 4.870575,-10.62099 0.904535,-0.41113 -0.417479,3.3576 0.626218,0.89079 0.834954,15.89722 1.391594,3.70021 -3.687722,5.34476 0.208739,1.37044 -0.347898,5.68737 1.87865,3.28908 7.375442,2.19272 1.252433,2.19272 -0.487057,0.13704 -4.244358,-0.54818 -6.540486,0.41114 -2.435287,-2.19272 -0.626216,-4.24839 -2.087389,-6.16703 -4.035619,-3.42612 -2.087388,-4.38544"
id="path3461"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 32.069579,218.11563 c -0.06958,-0.27409 0.695796,-1.23341 0.695796,-1.23341 l 2.783185,-0.0685 1.739491,2.26124 4.661836,5.13919 0.139158,1.57602 -4.174778,5.96145 -0.487057,6.16703 -2.922344,2.26124 -0.06958,1.57601 h -1.113274 l -1.322013,-3.08351 2.017809,-14.86938 z"
id="path3400"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.83827,222.43255 1.600331,-3.01499 -0.695796,-0.75375 -5.635951,-1.16488 -3.200663,0.82227 -0.06958,1.50749 1.53075,0.75375 1.461174,2.67237 -0.208739,1.71307 1.739489,1.02783 2.296129,-0.54818 z"
id="path3402"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.203977,217.70449 1.113274,-0.68522 2.365707,1.02784 1.322013,2.67237 -2.226548,2.26125 -1.322013,-0.82227 -1.322013,-0.61671 0.834956,-1.71306 z"
id="path3404"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 43.758957,226.61242 1.948228,0.68522 0.417479,1.91863 -0.626216,1.30193 -1.182854,0.34261 -1.113275,1.02784 -0.765376,3.63169 0.626218,3.01499 -1.252435,0.68522 -0.487057,-0.41113 -0.278319,-1.5075 -1.80907,-1.37045 -0.765376,-3.49464 3.618141,-3.42613 1.669912,-2.67237"
id="path3406"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.57776,223.25481 0.13916,0.68523 -2.783187,3.83726 0.06958,1.64454 -0.626218,1.50749 -1.60033,1.43897 -0.06958,0.75375 1.600333,1.91863 1.182854,3.08351 0.974114,0.68523 1.669911,-2.80942 -0.278318,-3.22056 3.966039,-3.3576 0.695796,-1.09636 -3.270243,-4.45396 z"
id="path3408"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.839954,236.39856 -0.834826,1.58948 0.166966,1.26061 1.057445,1.97315 0.500896,-0.32886 0.389584,-1.7539 1.447031,-1.151 2.337512,-4.0559 -0.22262,-1.04138 -1.947927,-1.69909 -2.114892,1.31542 0.278276,3.39819 z"
id="path3410"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.593778,229.84236 -1.043694,1.09636 0.765375,0.89079 1.043695,-0.20556 v -1.43898 z"
id="path3412"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.344793,218.25267 -0.765376,2.19272 -0.695796,0.27409 -0.695796,1.91863 -2.226548,2.26124 2.574446,3.56317 h 1.182854 l 0.487057,0.75375 0.626217,1.09636 1.948229,1.30193 2.922346,-0.6167 1.53075,-2.26125 -1.043694,-3.3576 -1.043693,-1.64454 1.322011,-2.60385 -0.904535,-1.37045 -2.226548,0.0685 z"
id="path3416"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.150522,238.17554 -0.518261,1.78635 1.036524,2.16915 1.684349,-2.04155 -0.647826,-2.16915 z"
id="path3418"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.789813,223.66595 1.600333,-0.75375 1.739489,-4.11135 2.922346,0.75375 1.322013,0.41114 0.139159,6.7152 -1.461172,1.02784 -2.226548,4.17987 -0.834956,-0.41114 -0.626216,0.95932 -2.574448,-0.61671 0.904537,-3.08351 z"
id="path3422"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 77.505077,218.59529 1.182854,-0.20557 2.435287,1.30193 -0.974115,1.02783 -2.087389,3.63169 -1.391593,0.0685 -1.113274,-0.61671 1.043695,-2.19271 z"
id="path3426"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 73.539038,231.06638 1.043695,-1.30193 1.043694,-2.80942 4.522676,1.71306 -0.974115,2.87795 -1.94823,-0.41114 -1.80907,1.09636 z"
id="path3428"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.200873,225.6531 7.932079,-7.94861 3.339822,1.09636 0.974115,0.13705 1.600331,-1.02784 3.339822,0.0685 -5.079314,12.81371 -3.200663,-1.98715 0.139161,-1.16489 -0.695798,-0.6167 -0.208737,-1.16488 -1.043696,0.27409 -3.200663,2.39829 z"
id="path3430"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.401536,230.99786 c 0,-0.2741 2.156968,-1.98716 2.156968,-1.98716 l 2.017811,1.30193 -0.904535,2.32976 -1.182855,0.75375 z"
id="path3432"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.679855,237.8501 0.765375,-1.91863 0.208739,-1.2334 2.156969,0.20557 2.156968,-2.87795 3.409403,1.02784 -0.904535,2.80942 -0.904535,0.34261 -0.626218,2.80943 1.043694,4.72805 -0.904535,1.09636 -1.80907,-2.19272 -0.626217,-1.37045 z"
id="path3434"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.131294,238.60385 0.626216,3.08351 -0.626216,3.22056 0.765375,0.95931 -0.626216,5.68737 2.504866,2.32976 1.87865,-0.47965 0.417478,-3.35761 1.669911,-0.0685 3.757301,-1.8501 -0.20874,-1.98716 -2.226548,-0.20556 -1.182854,-3.01499 -3.200662,-2.05568 -1.252434,-2.39828 z"
id="path3436"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 84.532619,251.41755 -0.278318,1.43898 -0.695797,0.6167 1.322013,2.67238 2.365709,-0.20557 1.53075,-2.94647 -2.365707,-1.98715 z"
id="path3438"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 64.993183,249.51154 -1.14959,2.51583 0.766392,1.69818 2.618509,0.25159 0.702526,1.19502 1.021857,2.39003 -0.574794,2.32714 3.89583,1.88688 0.95799,-1.06923 0.510928,-4.59139 -4.023561,-2.70451 -0.127732,-4.21402 z"
id="path3440"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.912822,251.00642 h 1.391592 l 2.574446,0.75375 1.391593,1.98715 1.461172,1.30193 -0.139159,3.42612 -3.409402,1.57602 -0.974115,-1.85011 0.626217,-3.3576 -3.270243,-1.85011 z"
id="path3442"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.147446,264.77944 1.80907,-1.98715 3.339822,-1.85011 1.322013,-0.0685 4.661835,-3.63169 1.391594,0.34261 0.556637,4.52248 -3.200664,4.04283 -2.852765,-0.82227 -1.80907,0.54818 -0.765376,1.43897 -2.087389,0.68522 z"
id="path3444"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 75.06979,272.93361 0.765376,-1.30192 1.252433,-0.41114 0.904535,-2.87794 1.94823,-0.61671 0.556637,2.60386 -3.339822,6.0985 -1.391593,-0.0685 z"
id="path3446"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.451649,268.20556 -1.252433,1.85011 2.504867,1.98715 0.765376,0.82227 1.73949,-2.39829 -2.296127,-2.80942 -1.461173,0.27409 z"
id="path3448"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.24531,254.0948 1.461172,1.02784 1.948229,0.54818 0.487058,1.64454 -1.461173,2.67237 -0.06958,1.78159 -1.669911,1.85011 -1.252433,-2.05568 0.487057,-2.80942 -1.391593,-0.34261 -0.904535,-2.80942 z"
id="path3450"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 47.585836,246.55246 -0.695796,3.70021 -0.139159,1.37045 1.87865,0.68523 1.391592,0.95931 1.809071,-1.64454 -0.417478,-0.95931 z"
id="path3452"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 54.682958,247.78586 -1.043694,1.02784 0.208739,1.98715 1.600331,0.89079 0.626217,-0.47965 0.06958,-2.26125 z"
id="path3454"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.629531,258.95503 4.800994,-6.16703 3.409402,0.82227 0.556637,1.78159 3.131083,4.79657 -1.669911,5.82441 -3.200663,-1.37045 -0.417478,-3.49464 -2.087388,1.30192 z"
id="path3456"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,252.71948 -0.487056,1.98715 1.60033,1.57602 1.461174,-0.20557 -0.347899,-2.19272 z"
id="path3458"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 67.555189,261.6274 -1.80907,2.80943 -2.435287,8.42826 2.783185,3.76874 1.461172,-0.0685 1.113274,-2.12419 1.043696,-0.20557 0.487057,-1.09636 -1.043694,-4.45396 1.182853,-4.31692 z"
id="path3460"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.718577,267.79443 1.600331,-1.23341 2.017809,1.71306 -0.904535,1.85011 z"
id="path3462"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.509838,276.49678 2.156968,-4.591 1.391593,-0.27409 0.834955,1.50749 -2.017809,5.13919 z"
id="path3464"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.242911,274.02997 1.391592,0.20557 1.043694,3.01499 2.01781,0.68522 1.530751,1.57602 -0.904535,2.87795 -2.365707,2.32976 -0.139159,3.56317 -1.322013,1.98715 -2.504867,-1.85011 -0.278318,-2.67237 -1.530752,-1.78159 -1.113274,-3.08351 3.61814,-4.17987 z"
id="path3466"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.893354,276.5653 3.270244,1.16489 0.06958,3.70021 -0.556637,0.68523 0.974115,3.70021 1.252433,1.64454 0.06958,3.08351 -2.017809,1.37045 -2.574447,8.08566 -2.574447,-1.30193 -1.948229,-9.79872 z"
id="path3468"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.440258,283.5546 h 0.556637 l 0.417478,0.95931 -0.208739,1.30193 -1.461172,0.13704 z"
id="path3472"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 56.700767,279.16916 -1.113274,0.95931 0.834956,2.80943 1.600331,0.20556 0.487058,-2.05567 -0.695796,-1.91863 z"
id="path3474"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 53.152207,272.17987 0.139159,5.13918 1.87865,1.23341 0.834955,-0.54818 0.904535,-3.63169 1.530752,-1.57602 -1.669911,-3.97431 -3.548561,3.08352 z"
id="path3476"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,258.33832 -0.208739,3.83726 -4.731414,3.97431 1.948229,2.80942 8.488716,0.82227 0.417478,1.98715 1.043694,-0.75375 0.487057,-2.19272 1.182854,-1.64454 -0.417478,-1.09635 -1.87865,-2.60386 -3.757299,-1.37045 -1.461174,-3.22056 z"
id="path3480"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 40.279975,263.68308 1.669912,0.6167 3.061502,-6.37259 -0.904535,-5.61884 -2.504867,-0.34262 -1.391592,-1.2334 2.156968,-7.606 -2.087388,-4.45396 -3.409402,1.57602 -0.834956,3.42612 -1.87865,0.20557 -0.347898,2.1242 1.530752,1.64454 h 1.322013 l 0.626217,3.90578 2.296127,5.61884 -0.347898,2.19272 z"
id="path3482"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.531337,247.61066 -0.590018,-0.31657 -0.420783,-1.71262 0.427793,-0.66945 1.306823,-1.13114 2.316342,-1.38746 1.06612,0.23465 -0.01701,2.21105 -2.36166,3.35302 z"
id="path4284"
inkscape:connector-curvature="0"
inkscape:transform-center-x="4.9927099"
inkscape:transform-center-y="-9.3161687" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.373733,232.22199 -0.815102,1.03206 4.017286,4.12827 1.571981,0.17201 1.339096,-0.86006 0.931544,0.63071 2.387083,-2.98152 -2.794634,-0.91739 -3.027519,0.22934 z"
id="path3601"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.407878,237.1102 -1.301737,2.34289 -1.301738,0.61888 -0.17955,1.45878 -4.488748,1.54719 -0.403989,1.50299 0.314213,0.30944 1.032412,0.0884 v 1.41457 l 1.660839,1.50299 2.154598,-1.94504 1.571064,0.35364 2.738136,-1.94504 -1.436399,-2.56392 0.987525,-3.44803 -0.583538,-1.37037 z"
id="path3603"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.104217,246.96003 5.843936,-6.55723 0.659867,-2.66044 2.221783,-0.40757 -0.386451,-3.39556 -2.000988,-0.60704 -6.246127,-0.36572 -2.624948,2.5137 1.519708,2.75102 -0.347742,5.51876 z"
id="path3605"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.024647,249.63275 5.822153,1.31875 1.047988,-3.89891 -1.280874,-1.43343 0.523995,-6.02038 -3.551515,5.275 0.34933,2.06413 -2.037753,0.80272 -1.164431,0.45869 z"
id="path3607"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.099222,247.24427 -2.095974,1.72011 -0.05822,1.60543 0.465772,1.72011 1.455539,0.97473 -0.407551,0.97473 2.328861,-0.34402 2.27064,-2.86685 -1.571981,-0.57337 -0.640437,-2.86685 -1.51376,-0.40136 z"
id="path3609"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.078067,234.34346 0.291107,4.47228 -1.863089,1.43342 2.095976,3.72691 2.037753,0.0573 2.27064,-3.55489 -2.969297,-4.98831 z"
id="path3611"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.660282,245.46683 -3.318627,4.30027 1.339096,1.26141 2.561747,-0.28668 1.222652,-3.15354 z"
id="path3613"
inkscape:connector-curvature="0" />
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 1.7 KiB

After

Width:  |  Height:  |  Size: 34 KiB

View File

@@ -0,0 +1,51 @@
# Exiftool
mat2 is in fact using exiftool to extract metadata from files,
but not to remove them. The previous iteration of mat2, MAT,
was using exiftool to remove metadata, which lead to several cases where
they weren't correctly removed, if at all.
For example, [Exiftool's documentation](https://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PDF.html)
states the following with regard to PDF:
> All metadata edits are reversible. While this would normally be considered an
advantage, it is a potential security problem because old information is never
actually deleted from the file.
To remove metadata, mat2 usually re-render the file completely, eliminating
all possible original metadata. See the `implementation_notes.md` file for
details.
# jpegoptim, optipng, …
While designed to reduce as much as possible the size of pictures,
those software can be used to remove metadata. They usually have excellent
support for a single picture format, and can be used in place of mat2 for them.
# PDF Redact Tools
[PDF Redact Tools](https://github.com/firstlookmedia/pdf-redact-tools) is
software developed by the people from [First Look
Media](https://firstlook.media/), the entity behind, amongst other things,
[The Intercept](https://theintercept.com/).
The tool uses roughly the same approach than mat2 to deal with PDF,
which is unfortunately the only fileformat that it does support.
It's interesting to note that it has counter-measures against
[yellow dots](https://en.wikipedia.org/wiki/Machine_Identification_Code),
a capacity that mat2 doesn't have.
# Exiv2
[Exiv2](https://www.exiv2.org/) was considered for mat2,
but it currently misses a lot of metadata.
# Others non open source software/online service
There are a lot of closed-source software and online service claiming to remove
metadata from your files, but since there is no way to actually verify that
they're effectively removing them, let alone adding unique markers, they
shouldn't be used.

View File

@@ -4,41 +4,57 @@ Implementation notes
Lightweight cleaning mode
-------------------------
Due to *popular* request, MAT2 is providing a *lightweight* cleaning mode,
Due to *popular* request, mat2 is providing a *lightweight* cleaning mode,
that only cleans the superficial metadata of your file, but not
the ones that might be in **embeded** resources. Like for example,
the ones that might be in **embedded** resources. Like for example,
images in a PDF or an office document.
Revisions handling
------------------
Revisions are handled according to the principle of least astonishment: they are entirely removed.
Revisions are handled according to the principle of least astonishment: they
are entirely removed.
- Either the users aren't aware of the revisions, are thus they should be deleted. For example journalists that are editing a document to erase mentions sources mentions.
- Either the users aren't aware of the revisions, are thus they should be
deleted. For example journalists that are editing a document to erase
mentions sources mentions.
- Or they are aware of it, and will likely not expect MAT2 to be able to keep the revisions, that are basically traces about how, when and who edited the document.
- Or they are aware of it, and will likely not expect mat2 to be able to keep
the revisions, that are basically traces about how, when and who edited the
document.
Race conditions
---------------
MAT2 does its very best to avoid crashing at runtime. This is why it's checking
if the file is valid __at parser creation__. MAT2 doesn't take any measure to
mat2 does its very best to avoid crashing at runtime. This is why it's checking
if the file is valid __at parser creation__. mat2 doesn't take any measure to
ensure that the file is not changed between the time the parser is
instantiated, and the call to clean or show the metadata.
Symlink attacks
---------------
MAT2 output predictable filenames (like yourfile.jpg.cleaned).
mat2 output predictable filenames (like yourfile.jpg.cleaned).
This may lead to symlink attack. Please check if you OS prevent
against them
Archives handling
-----------------
MAT2 doesn't support archives yet, because we haven't found an usable way to ask the user
what to do when a non-supported files are encountered.
By default, when cleaning a non-support file format in an archive,
mat2 will abort with a detailed error message.
While strongly discouraged, it's possible to override this behaviour to force
the exclusion, or inclusion of unknown files into the cleaned archive.
While Python's [zipfile](https://docs.python.org/3/library/zipfile.html) module
provides *safe* way to extract members of a zip archive, the
[tarfile](https://docs.python.org/3/library/tarfile.html) one doesn't,
meaning that it's up to mat2 to implement safety checks. Currently,
it defends against path-traversal, both relative and absolute,
symlink-related attacks, setuid/setgid attacks, duplicate members, block and
char devices, … but there might still be dragons lurking there.
PDF handling
------------
@@ -49,10 +65,10 @@ didn't remove any *deep metadata*, like the ones in embedded pictures. This was
on of the reason MAT was abandoned: the absence of satisfying solution to
handle PDF. But apparently, people are ok with [pdf redact
tools](https://github.com/firstlookmedia/pdf-redact-tools), that simply
transform the PDF into images. So this is what's MAT2 is doing too.
transform the PDF into images. So this is what's mat2 is doing too.
Of course, it would be possible to detect images in PDf file, and process them
with MAT2, but since a PDF can contain a lot of things, like images, videos,
with mat2, but since a PDF can contain a lot of things, like images, videos,
javascript, pdf, blobs, … this is the easiest and safest way to clean them.
Images handling
@@ -61,3 +77,11 @@ Images handling
When possible, images are handled like PDF: rendered on a surface, then saved
to the filesystem. This ensures that every metadata is removed.
XML attacks
-----------
Since our threat model conveniently excludes files crafted to specifically
bypass mat2, fileformats containing harmful XML are out of our scope.
But since mat2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities)
to process XML, it's "only" vulnerable to DoS, and not memory corruption:
odds are that the user will notice that the cleaning didn't succeed.

99
doc/mat2.1 Normal file
View File

@@ -0,0 +1,99 @@
.TH mat2 "1" "January 2025" "mat2 0.13.5" "User Commands"
.SH NAME
mat2 \- the metadata anonymisation toolkit 2
.SH SYNOPSIS
\fBmat2\fR [\-h] [\-v] [\-l] [\-V] [-s | -L] [\fIfiles\fR [\fIfiles ...\fR]]
.SH DESCRIPTION
.B mat2
removes metadata from various fileformats. It supports a wide variety of file
formats, audio, office, images, …
Careful, mat2 does not clean files in-place, instead, it will produce a file with the word
"cleaned" between the filename and its extension, for example "filename.cleaned.png"
for a file named "filename.png".
.SH OPTIONS
.SS "positional arguments:"
.TP
\fBfiles\fR
the files to process
.SS "optional arguments:"
.TP
\fB\-h\fR, \fB\-\-help\fR
show this help message and exit
.TP
\fB\-v\fR, \fB\-\-version\fR
show program's version number and exit
.TP
\fB\-l\fR, \fB\-\-list\fR
list all supported fileformats
.TP
\fB\-\-check\-dependencies\fR
check if mat2 has all the dependencies it needs
.TP
\fB\-V\fR, \fB\-\-verbose\fR
show more verbose status information
.TP
\fB\-\-unknown-members\fR \fIpolicy\fR
how to handle unknown members of archive-style files (policy should be one of: abort, omit, keep)
.TP
\fB\-s\fR, \fB\-\-show\fR
list harmful metadata detectable by mat2 without removing them
.TP
\fB\-L\fR, \fB\-\-lightweight\fR
remove SOME metadata
.TP
\fB\--no-sandbox\fR
disable bubblewrap's sandboxing
.TP
\fB\--inplace\fR
clean in place, without backup
.SH EXAMPLES
To remove all the metadata from a PDF file:
.PP
.nf
.RS
mat2 ./myfile.pdf
.RE
.fi
.PP
.SH NOTES ABOUT METADATA
While mat2 is doing its very best to display metadata when the --show flag is
passed, it doesn't mean that a file is clean from any metadata if mat2 doesn't
show any. There is no reliable way to detect every single possible metadata for
complex file formats.
.PP
This is why you shouldn't rely on metadata's presence to decide if your file must
be cleaned or not.
.PP
Moreover, mat2 goes to great lengths to make sure that as much metadata as
possible are removed. This might sometimes result in a loss of quality of the
processed files. For example, textual based pdf file converted into image based
one means that it'll be no longer possible to select text in them. If you're
experiencing this, you might want to give the lightweight cleaning mode a try,
but keep in mind by doing so, some metadata \fBwon't be cleaned\fR.
.SH BUGS
While mat2 does its very best to remove every single metadata,
it's still in beta, and \fBsome\fR might remain. Should you encounter
some issues, check the bugtracker: https://github.com/jvoisin/mat2/issues
.PP
Please use accordingly and be careful.
.SH AUTHOR
This software was made by Julien (jvoisin) Voisin with the support of the Tails project.
.SH COPYRIGHT
This software is released on LGPLv3.
.SH "SEE ALSO"
.BR exiftool (1p)
.BR pdf-redact-tools (1)

View File

@@ -3,7 +3,7 @@ Threat Model
The Metadata Anonymisation Toolkit 2 adversary has a number
of goals, capabilities, and counter-attack types that can be
used to guide us towards a set of requirements for the MAT2.
used to guide us towards a set of requirements for the mat2.
This is an overhaul of MAT's (the first iteration of the software) one.
@@ -53,7 +53,7 @@ Adversary
user. This is the strongest position for the adversary to
have. In this case, the adversary is capable of inserting
arbitrary, custom watermarks specifically for tracking
the user. In general, MAT2 cannot defend against this
the user. In general, mat2 cannot defend against this
adversary, but we list it for completeness' sake.
- The adversary created the document for a group of users.
@@ -65,7 +65,7 @@ Adversary
- The adversary did not create the document, the weakest
position for the adversary to have. The file format is
(most of the time) standard, nothing custom is added:
MAT2 must be able to remove all metadata from the file.
mat2 must be able to remove all metadata from the file.
Requirements
@@ -73,28 +73,28 @@ Requirements
* Processing
- MAT2 *should* avoid interactions with information.
- mat2 *should* avoid interactions with information.
Its goal is to remove metadata, and the user is solely
responsible for the information of the file.
- MAT2 *must* warn when encountering an unknown
format. For example, in a zipfile, if MAT encounters an
- mat2 *must* warn when encountering an unknown
format. For example, in a zipfile, if mat2 encounters an
unknown format, it should warn the user, and ask if the
file should be added to the anonymised archive that is
produced.
- MAT2 *must* not add metadata, since its purpose is to
- mat2 *must* not add metadata, since its purpose is to
anonymise files: every added items of metadata decreases
anonymity.
- MAT2 *should* handle unknown/hidden metadata fields,
- mat2 *should* handle unknown/hidden metadata fields,
like proprietary extensions of open formats.
- MAT2 *must not* fail silently. Upon failure,
MAT2 *must not* modify the file in any way.
- mat2 *must not* fail silently. Upon failure,
mat2 *must not* modify the file in any way.
- MAT2 *might* leak the fact that MAT2 was used on the file,
- mat2 *might* leak the fact that mat2 was used on the file,
since it might be uncommon for some file formats to come
without any kind of metadata, an adversary might suspect that
the user used MAT2 on certain files.
the user used mat2 on certain files.

14
dolphin/README.md Normal file
View File

@@ -0,0 +1,14 @@
Dolphin integration
===================
Thanks to [Miguel Marco](https://riemann.unizar.es/~mmarco/), here is an neat
integration for [Dolphin](https://kde.org/applications/system/org.kde.dolphin),
the KDE file manager:
1. Add the `mat2.desktop` file either in
- `/usr/share/kservices5/ServiceMenus/` to install it globally
- `~/.local/share/kservices5/ServiceMenus/` for a specific user
2. Run `kbuildsycoca5` to update the corresponding database
3. Enjoy your new contextual menu to remove metadata from your files!

13
dolphin/mat2.desktop Normal file
View File

@@ -0,0 +1,13 @@
[Desktop Entry]
X-KDE-ServiceTypes=KonqPopupMenu/Plugin
MimeType=application/pdf;application/vnd.oasis.opendocument.chart;application/vnd.oasis.opendocument.formula;application/vnd.oasis.opendocument.graphics;application/vnd.oasis.opendocument.image;application/vnd.oasis.opendocument.presentation;application/vnd.oasis.opendocument.spreadsheet;application/vnd.oasis.opendocument.text;application/vnd.openxmlformats-officedocument.presentationml.presentation;application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;application/vnd.openxmlformats-officedocument.wordprocessingml.document;application/x-bittorrent;application/zip;audio/flac;audio/mpeg;audio/ogg;audio/x-flac;image/jpeg;image/png;image/tiff;image/x-ms-bmp;text/plain;video/mp4;video/x-msvideo;
Actions=cleanMetadata;
Type=Service
[Desktop Action cleanMetadata]
Name=Clean metadata
Name[de]=Metadaten löschen
Name[es]=Limpiar metadatos
Icon=/usr/share/icons/hicolor/scalable/apps/mat2.svg
Exec=kdialog --yesno "$( mat2 -s %F )" --title "Clean Metadata?" && mat2 %U
Exec[de]=kdialog --yesno "$( mat2 -s %F )" --title "Metadaten löschen?" && mat2 %U

View File

@@ -1,7 +1,13 @@
#!/bin/env python3
#!/usr/bin/env python3
import enum
import importlib
from typing import Dict
from . import exiftool, video
# A set of extension that aren't supported, despite matching a supported mimetype
unsupported_extensions = {
UNSUPPORTED_EXTENSIONS = {
'.asc',
'.bat',
'.brf',
@@ -17,3 +23,74 @@ unsupported_extensions = {
'.xsd',
'.xsl',
}
DEPENDENCIES = {
'Cairo': {
'module': 'cairo',
'required': True,
},
'PyGobject': {
'module': 'gi',
'required': True,
},
'GdkPixbuf from PyGobject': {
'module': 'gi.repository.GdkPixbuf',
'required': True,
},
'Poppler from PyGobject': {
'module': 'gi.repository.Poppler',
'required': True,
},
'GLib from PyGobject': {
'module': 'gi.repository.GLib',
'required': True,
},
'Mutagen': {
'module': 'mutagen',
'required': True,
},
}
CMD_DEPENDENCIES = {
'Exiftool': {
'cmd': exiftool._get_exiftool_path,
'required': False,
},
'Ffmpeg': {
'cmd': video._get_ffmpeg_path,
'required': False,
},
}
def check_dependencies() -> Dict[str, Dict[str, bool]]:
ret: Dict[str, Dict] = dict()
for key, value in DEPENDENCIES.items():
ret[key] = {
'found': True,
'required': value['required'],
}
try:
importlib.import_module(value['module']) # type: ignore
except ImportError: # pragma: no cover
ret[key]['found'] = False
for k, v in CMD_DEPENDENCIES.items():
ret[k] = {
'found': True,
'required': v['required'],
}
try:
v['cmd']() # type: ignore
except RuntimeError: # pragma: no cover
ret[k]['found'] = False
return ret
@enum.unique
class UnknownMemberPolicy(enum.Enum):
ABORT = 'abort'
OMIT = 'omit'
KEEP = 'keep'

View File

@@ -1,27 +1,48 @@
import abc
import os
from typing import Set, Dict
assert Set # make pyflakes happy
import re
from typing import Union, Set, Dict
class AbstractParser(abc.ABC):
meta_list = set() # type: Set[str]
mimetypes = set() # type: Set[str]
""" This is the base class of every parser.
It might yield `ValueError` on instantiation on invalid files,
and `RuntimeError` when something went wrong in `remove_all`.
"""
meta_list: Set[str] = set()
mimetypes: Set[str] = set()
def __init__(self, filename: str) -> None:
"""
:raises ValueError: Raised upon an invalid file
"""
if re.search('^[a-z0-9./]', filename) is None:
# Some parsers are calling external binaries,
# this prevents shell command injections
filename = os.path.join('.', filename)
self.filename = filename
fname, extension = os.path.splitext(filename)
# Special case for tar.gz, tar.bz2, … files
if fname.endswith('.tar') and len(fname) > 4:
fname, extension = fname[:-4], '.tar' + extension
self.output_filename = fname + '.cleaned' + extension
self.lightweight_cleaning = False
self.sandbox = True
@abc.abstractmethod
def get_meta(self) -> Dict[str, str]:
pass # pragma: no cover
def get_meta(self) -> Dict[str, Union[str, Dict]]:
"""Return all the metadata of the current file
:raises RuntimeError: Raised if the cleaning process went wrong.
"""
@abc.abstractmethod
def remove_all(self) -> bool:
pass # pragma: no cover
"""
Remove all the metadata of the current file
def remove_all_lightweight(self) -> bool:
""" Remove _SOME_ metadata. """
return self.remove_all()
:raises RuntimeError: Raised if the cleaning process went wrong.
"""

487
libmat2/archive.py Normal file
View File

@@ -0,0 +1,487 @@
import abc
import stat
import zipfile
import datetime
import tarfile
import tempfile
import os
import logging
import shutil
from typing import Pattern, Union, Any, Set, Dict, List
from . import abstract, UnknownMemberPolicy, parser_factory
# pylint: disable=not-callable,assignment-from-no-return,too-many-branches
# An ArchiveClass is a class representing an archive,
# while an ArchiveMember is a class representing an element
# (usually a file) of an archive.
ArchiveClass = Union[zipfile.ZipFile, tarfile.TarFile]
ArchiveMember = Union[zipfile.ZipInfo, tarfile.TarInfo]
class ArchiveBasedAbstractParser(abstract.AbstractParser):
"""Base class for all archive-based formats.
Welcome to a world of frustrating complexity and tediouness:
- A lot of file formats (docx, odt, epubs, …) are archive-based,
so we need to add callbacks erverywhere to allow their respective
parsers to apply specific cleanup to the required files.
- Python has two different modules to deal with .tar and .zip files,
with similar-but-yet-o-so-different API, so we need to write
a ghetto-wrapper to avoid duplicating everything
- The combination of @staticmethod and @abstractstaticmethod is
required because for now, mypy doesn't know that
@abstractstaticmethod is, indeed, a static method.
- Mypy is too dumb (yet) to realise that a type A is valid under
the Union[A, B] constrain, hence the weird `# type: ignore`
annotations.
"""
# Tarfiles can optionally support compression
# https://docs.python.org/3/library/tarfile.html#tarfile.open
compression = ''
def __init__(self, filename):
super().__init__(filename)
# We ignore typing here because mypy is too stupid
self.archive_class = None # type: ignore
self.member_class = None # type: ignore
# Those are the files that have a format that _isn't_
# supported by mat2, but that we want to keep anyway.
self.files_to_keep: Set[Pattern] = set()
# Those are the files that we _do not_ want to keep,
# no matter if they are supported or not.
self.files_to_omit: Set[Pattern] = set()
# what should the parser do if it encounters an unknown file in
# the archive?
self.unknown_member_policy: UnknownMemberPolicy = UnknownMemberPolicy.ABORT
# The LGTM comment is to mask a false-positive,
# see https://lgtm.com/projects/g/jvoisin/mat2/
self.is_archive_valid() # lgtm [py/init-calls-subclass]
def is_archive_valid(self):
"""Raise a ValueError is the current archive isn't a valid one."""
def _specific_cleanup(self, full_path: str) -> bool:
""" This method can be used to apply specific treatment
to files present in the archive."""
# pylint: disable=unused-argument
return True # pragma: no cover
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
""" This method can be used to extract specific metadata
from files present in the archive."""
# pylint: disable=unused-argument
return {} # pragma: no cover
def _final_checks(self) -> bool:
""" This method is invoked after the file has been cleaned,
allowing to run final verifications.
"""
# pylint: disable=unused-argument
return True
@staticmethod
@abc.abstractmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
"""Return all the members of the archive."""
@staticmethod
@abc.abstractmethod
def _clean_member(member: ArchiveMember) -> ArchiveMember:
"""Remove all the metadata for a given member."""
@staticmethod
@abc.abstractmethod
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
"""Return all the metadata of a given member."""
@staticmethod
@abc.abstractmethod
def _get_member_name(member: ArchiveMember) -> str:
"""Return the name of the given member."""
@staticmethod
@abc.abstractmethod
def _is_dir(member: ArchiveMember) -> bool:
"""Return true is the given member is a directory."""
@abc.abstractmethod
def _add_file_to_archive(self, archive: ArchiveClass, member: ArchiveMember,
full_path: str):
"""Add the file at full_path to the archive, via the given member."""
@staticmethod
def _set_member_permissions(member: ArchiveMember, permissions: int) -> ArchiveMember:
"""Set the permission of the archive member."""
# pylint: disable=unused-argument
return member
@staticmethod
def _get_member_compression(member: ArchiveMember):
"""Get the compression of the archive member."""
# pylint: disable=unused-argument
return None
@staticmethod
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
"""Set the compression of the archive member."""
# pylint: disable=unused-argument
return member
def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta: Dict[str, Union[str, Dict]] = dict()
with self.archive_class(self.filename) as zin:
temp_folder = tempfile.mkdtemp()
for item in self._get_all_members(zin):
local_meta = self._get_member_meta(item)
member_name = self._get_member_name(item)
if self._is_dir(item): # pragma: no cover
continue # don't keep empty folders
full_path = os.path.join(temp_folder, member_name)
if not os.path.abspath(full_path).startswith(temp_folder):
logging.error("%s contains a file (%s) pointing outside (%s) of its root.",
self.filename, member_name, full_path)
break
try:
zin.extract(member=item, path=temp_folder)
except OSError as e:
logging.error("Unable to extraxt %s from %s: %s", item, self.filename, e)
os.chmod(full_path, stat.S_IRUSR)
specific_meta = self._specific_get_meta(full_path, member_name)
local_meta = {**local_meta, **specific_meta}
member_parser, _ = parser_factory.get_parser(full_path) # type: ignore
if member_parser:
member_parser.sandbox = self.sandbox
local_meta = {**local_meta, **member_parser.get_meta()}
if local_meta:
meta[member_name] = local_meta
shutil.rmtree(temp_folder)
return meta
def remove_all(self) -> bool:
# pylint: disable=too-many-branches
with self.archive_class(self.filename) as zin,\
self.archive_class(self.output_filename, 'w' + self.compression) as zout:
temp_folder = tempfile.mkdtemp()
abort = False
# Sort the items to process, to reduce fingerprinting,
# and keep them in the `items` variable.
items: List[ArchiveMember] = list()
for item in sorted(self._get_all_members(zin), key=self._get_member_name):
# Some fileformats do require to have the `mimetype` file
# as the first file in the archive.
if self._get_member_name(item) == 'mimetype':
items.insert(0, item)
else:
items.append(item)
# Since files order is a fingerprint factor,
# we're iterating (and thus inserting) them in lexicographic order.
for item in items:
member_name = self._get_member_name(item)
if self._is_dir(item):
continue # don't keep empty folders
full_path = os.path.join(temp_folder, member_name)
if not os.path.abspath(full_path).startswith(temp_folder):
logging.error("%s contains a file (%s) pointing outside (%s) of its root.",
self.filename, member_name, full_path)
abort = True
break
zin.extract(member=item, path=temp_folder)
try:
original_permissions = os.stat(full_path).st_mode
except FileNotFoundError:
logging.error("Something went wrong during processing of "
"%s in %s, likely a path traversal attack.",
member_name, self.filename)
abort = True
# we're breaking instead of continuing, because this exception
# is raised in case of weird path-traversal-like atttacks.
break
os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR)
original_compression = self._get_member_compression(item)
if self._specific_cleanup(full_path) is False:
logging.warning("Something went wrong during deep cleaning of %s in %s",
member_name, self.filename)
abort = True
continue
if any(map(lambda r: r.search(member_name), self.files_to_keep)):
# those files aren't supported, but we want to add them anyway
pass
elif any(map(lambda r: r.search(member_name), self.files_to_omit)):
continue
else: # supported files that we want to first clean, then add
member_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
if not member_parser:
if self.unknown_member_policy == UnknownMemberPolicy.OMIT:
logging.warning("In file %s, omitting unknown element %s (format: %s)",
self.filename, member_name, mtype)
continue
elif self.unknown_member_policy == UnknownMemberPolicy.KEEP:
logging.warning("In file %s, keeping unknown element %s (format: %s)",
self.filename, member_name, mtype)
else:
logging.error("In file %s, element %s's format (%s) "
"isn't supported",
self.filename, member_name, mtype)
abort = True
continue
else:
member_parser.sandbox = self.sandbox
if member_parser.remove_all() is False:
logging.warning("In file %s, something went wrong \
with the cleaning of %s \
(format: %s)",
self.filename, member_name, mtype)
abort = True
continue
os.rename(member_parser.output_filename, full_path)
zinfo = self.member_class(member_name) # type: ignore
zinfo = self._set_member_permissions(zinfo, original_permissions)
zinfo = self._set_member_compression(zinfo, original_compression)
clean_zinfo = self._clean_member(zinfo)
self._add_file_to_archive(zout, clean_zinfo, full_path)
shutil.rmtree(temp_folder)
if abort:
os.remove(self.output_filename)
return False
if not self._final_checks():
return False # pragma: no cover
return True
class TarParser(ArchiveBasedAbstractParser):
mimetypes = {'application/x-tar'}
def __init__(self, filename):
super().__init__(filename)
# yes, it's tarfile.open and not tarfile.TarFile,
# as stated in the documentation:
# https://docs.python.org/3/library/tarfile.html#tarfile.TarFile
# This is required to support compressed archives.
self.archive_class = tarfile.open
self.member_class = tarfile.TarInfo
def is_archive_valid(self):
if tarfile.is_tarfile(self.filename) is False:
raise ValueError
self.__check_tarfile_safety()
def __check_tarfile_safety(self):
"""Checks if the tarfile doesn't have any "suspicious" members.
This is a rewrite of this patch: https://bugs.python.org/file47826/safetarfile-4.diff
inspired by this bug from 2014: https://bugs.python.org/issue21109
because Python's stdlib doesn't provide a way to "safely" extract
things from a tar file.
"""
names = set()
with tarfile.open(self.filename) as f:
members = f.getmembers()
for member in members:
name = member.name
if os.path.isabs(name):
raise ValueError("The archive %s contains a file with an " \
"absolute path: %s" % (self.filename, name))
elif os.path.normpath(name).startswith('../') or '/../' in name:
raise ValueError("The archive %s contains a file with an " \
"path traversal attack: %s" % (self.filename, name))
if name in names:
raise ValueError("The archive %s contains two times the same " \
"file: %s" % (self.filename, name))
else:
names.add(name)
if member.isfile():
if member.mode & stat.S_ISUID:
raise ValueError("The archive %s contains a setuid file: %s" % \
(self.filename, name))
elif member.mode & stat.S_ISGID:
raise ValueError("The archive %s contains a setgid file: %s" % \
(self.filename, name))
elif member.issym():
linkname = member.linkname
if os.path.normpath(linkname).startswith('..'):
raise ValueError('The archive %s contains a symlink pointing' \
'outside of the archive via a path traversal: %s -> %s' % \
(self.filename, name, linkname))
if os.path.isabs(linkname):
raise ValueError('The archive %s contains a symlink pointing' \
'outside of the archive: %s -> %s' % \
(self.filename, name, linkname))
elif member.isdev():
raise ValueError("The archive %s contains a non-regular " \
"file: %s" % (self.filename, name))
elif member.islnk():
raise ValueError("The archive %s contains a hardlink: %s" \
% (self.filename, name))
@staticmethod
def _clean_member(member: ArchiveMember) -> ArchiveMember:
assert isinstance(member, tarfile.TarInfo) # please mypy
member.mtime = member.uid = member.gid = 0
member.uname = member.gname = ''
return member
@staticmethod
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
assert isinstance(member, tarfile.TarInfo) # please mypy
metadata = {}
if member.mtime != 0:
metadata['mtime'] = str(datetime.datetime.fromtimestamp(member.mtime))
if member.uid != 0:
metadata['uid'] = str(member.uid)
if member.gid != 0:
metadata['gid'] = str(member.gid)
if member.uname != '':
metadata['uname'] = member.uname
if member.gname != '':
metadata['gname'] = member.gname
return metadata
def _add_file_to_archive(self, archive: ArchiveClass, member: ArchiveMember,
full_path: str):
assert isinstance(member, tarfile.TarInfo) # please mypy
assert isinstance(archive, tarfile.TarFile) # please mypy
archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore
@staticmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
assert isinstance(archive, tarfile.TarFile) # please mypy
return archive.getmembers() # type: ignore
@staticmethod
def _get_member_name(member: ArchiveMember) -> str:
assert isinstance(member, tarfile.TarInfo) # please mypy
return member.name
@staticmethod
def _set_member_permissions(member: ArchiveMember, permissions: int) -> ArchiveMember:
assert isinstance(member, tarfile.TarInfo) # please mypy
member.mode = permissions
return member
@staticmethod
def _is_dir(member: ArchiveMember) -> bool:
assert isinstance(member, tarfile.TarInfo) # please mypy
return member.isdir()
class TarGzParser(TarParser):
compression = ':gz'
mimetypes = {'application/x-tar+gz'}
class TarBz2Parser(TarParser):
compression = ':bz2'
mimetypes = {'application/x-tar+bz2'}
class TarXzParser(TarParser):
compression = ':xz'
mimetypes = {'application/x-tar+xz'}
class ZipParser(ArchiveBasedAbstractParser):
mimetypes = {'application/zip'}
def __init__(self, filename: str):
super().__init__(filename)
self.archive_class = zipfile.ZipFile
self.member_class = zipfile.ZipInfo
def is_archive_valid(self):
try:
with zipfile.ZipFile(self.filename):
pass
except (zipfile.BadZipFile, OSError):
raise ValueError
@staticmethod
def _clean_member(member: ArchiveMember) -> ArchiveMember:
assert isinstance(member, zipfile.ZipInfo) # please mypy
member.create_system = 3 # Linux
member.comment = b''
member.date_time = (1980, 1, 1, 0, 0, 0) # this is as early as a zipfile can be
return member
@staticmethod
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
assert isinstance(member, zipfile.ZipInfo) # please mypy
metadata = {}
if member.create_system == 3: # this is Linux
pass
elif member.create_system == 2:
metadata['create_system'] = 'Windows'
else:
metadata['create_system'] = 'Weird'
if member.comment:
metadata['comment'] = member.comment # type: ignore
if member.date_time != (1980, 1, 1, 0, 0, 0):
metadata['date_time'] = str(datetime.datetime(*member.date_time))
return metadata
def _add_file_to_archive(self, archive: ArchiveClass, member: ArchiveMember,
full_path: str):
assert isinstance(archive, zipfile.ZipFile) # please mypy
assert isinstance(member, zipfile.ZipInfo) # please mypy
with open(full_path, 'rb') as f:
archive.writestr(member, f.read(),
compress_type=member.compress_type)
@staticmethod
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
assert isinstance(archive, zipfile.ZipFile) # please mypy
return archive.infolist() # type: ignore
@staticmethod
def _get_member_name(member: ArchiveMember) -> str:
assert isinstance(member, zipfile.ZipInfo) # please mypy
return member.filename
@staticmethod
def _get_member_compression(member: ArchiveMember):
assert isinstance(member, zipfile.ZipInfo) # please mypy
return member.compress_type
@staticmethod
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
assert isinstance(member, zipfile.ZipInfo) # please mypy
member.compress_type = compression
return member
@staticmethod
def _is_dir(member: ArchiveMember) -> bool:
assert isinstance(member, zipfile.ZipInfo) # please mypy
return member.is_dir()

View File

@@ -1,39 +1,54 @@
import mimetypes
import os
import shutil
import tempfile
from typing import Union, Dict
import mutagen
from . import abstract
from . import abstract, parser_factory, video
class MutagenParser(abstract.AbstractParser):
def __init__(self, filename):
super().__init__(filename)
try:
mutagen.File(self.filename)
if mutagen.File(self.filename) is None:
raise ValueError
except mutagen.MutagenError:
raise ValueError
def get_meta(self):
def get_meta(self) -> Dict[str, Union[str, Dict]]:
f = mutagen.File(self.filename)
if f.tags:
return {k:', '.join(v) for k, v in f.tags.items()}
return {k: ', '.join(map(str, v)) for k, v in f.tags.items()}
return {}
def remove_all(self):
def remove_all(self) -> bool:
shutil.copy(self.filename, self.output_filename)
f = mutagen.File(self.output_filename)
f.delete()
f.save()
try:
f.delete()
f.save()
except mutagen.MutagenError:
raise ValueError
return True
class MP3Parser(MutagenParser):
mimetypes = {'audio/mpeg', }
def get_meta(self):
metadata = {}
def get_meta(self) -> Dict[str, Union[str, Dict]]:
metadata: Dict[str, Union[str, Dict]] = dict()
meta = mutagen.File(self.filename).tags
if not meta:
return metadata
for key in meta:
if isinstance(key, tuple):
metadata[key[0]] = key[1]
continue
if not hasattr(meta[key], 'text'): # pragma: no cover
continue
metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
return metadata
@@ -44,3 +59,56 @@ class OGGParser(MutagenParser):
class FLACParser(MutagenParser):
mimetypes = {'audio/flac', 'audio/x-flac'}
def remove_all(self) -> bool:
shutil.copy(self.filename, self.output_filename)
f = mutagen.File(self.output_filename)
f.clear_pictures()
f.delete()
f.save(deleteid3=True)
return True
def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = super().get_meta()
for num, picture in enumerate(mutagen.File(self.filename).pictures):
name = picture.desc if picture.desc else 'Cover %d' % num
extension = mimetypes.guess_extension(picture.mime)
if extension is None: # pragma: no cover
meta[name] = 'harmful data'
continue
_, fname = tempfile.mkstemp()
fname = fname + extension
with open(fname, 'wb') as f:
f.write(picture.data)
p, _ = parser_factory.get_parser(fname) # type: ignore
if p is None:
raise ValueError
p.sandbox = self.sandbox
# Mypy chokes on ternaries :/
meta[name] = p.get_meta() if p else 'harmful data' # type: ignore
os.remove(fname)
return meta
class WAVParser(video.AbstractFFmpegParser):
mimetypes = {'audio/x-wav', }
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',
'Duration', 'Encoding', 'ExifToolVersion',
'FileAccessDate', 'FileInodeChangeDate',
'FileModifyDate', 'FileName', 'FilePermissions',
'FileSize', 'FileType', 'FileTypeExtension',
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
}
class AIFFParser(video.AbstractFFmpegParser):
mimetypes = {'audio/aiff', 'audio/x-aiff'}
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',
'Duration', 'Encoding', 'ExifToolVersion',
'FileAccessDate', 'FileInodeChangeDate',
'FileModifyDate', 'FileName', 'FilePermissions',
'FileSize', 'FileType', 'FileTypeExtension',
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
'NumSampleFrames', 'SampleSize',
}

113
libmat2/bubblewrap.py Normal file
View File

@@ -0,0 +1,113 @@
"""
Wrapper around a subset of the subprocess module,
that uses bwrap (bubblewrap) when it is available.
Instead of importing subprocess, other modules should use this as follows:
from . import subprocess
"""
import os
import shutil
import subprocess
import tempfile
import functools
from typing import Optional, List
__all__ = ['PIPE', 'run', 'CalledProcessError']
PIPE = subprocess.PIPE
CalledProcessError = subprocess.CalledProcessError
# pylint: disable=subprocess-run-check
@functools.lru_cache(maxsize=None)
def _get_bwrap_path() -> str:
which_path = shutil.which('bwrap')
if which_path:
return which_path
raise RuntimeError("Unable to find bwrap") # pragma: no cover
def _get_bwrap_args(tempdir: str,
input_filename: str,
output_filename: Optional[str] = None) -> List[str]:
ro_bind_args = []
cwd = os.getcwd()
# XXX: use --ro-bind-try once all supported platforms
# have a bubblewrap recent enough to support it.
ro_bind_dirs = ['/usr', '/lib', '/lib64', '/bin', '/sbin', '/etc/alternatives', cwd]
for bind_dir in ro_bind_dirs:
if os.path.isdir(bind_dir): # pragma: no cover
ro_bind_args.extend(['--ro-bind', bind_dir, bind_dir])
ro_bind_files = ['/etc/ld.so.cache']
for bind_file in ro_bind_files:
if os.path.isfile(bind_file): # pragma: no cover
ro_bind_args.extend(['--ro-bind', bind_file, bind_file])
args = ro_bind_args + \
['--dev', '/dev',
'--proc', '/proc',
'--chdir', cwd,
'--unshare-user-try',
'--unshare-ipc',
'--unshare-pid',
'--unshare-net',
'--unshare-uts',
'--unshare-cgroup-try',
'--new-session',
'--cap-drop', 'all',
# XXX: enable --die-with-parent once all supported platforms have
# a bubblewrap recent enough to support it.
# '--die-with-parent',
]
if output_filename:
# Mount an empty temporary directory where the sandboxed
# process will create its output file
output_dirname = os.path.dirname(os.path.abspath(output_filename))
args.extend(['--bind', tempdir, output_dirname])
absolute_input_filename = os.path.abspath(input_filename)
args.extend(['--ro-bind', absolute_input_filename, absolute_input_filename])
return args
def run(args: List[str],
input_filename: str,
output_filename: Optional[str] = None,
**kwargs) -> subprocess.CompletedProcess:
"""Wrapper around `subprocess.run`, that uses bwrap (bubblewrap) if it
is available.
Extra supported keyword arguments:
- `input_filename`, made available read-only in the sandbox
- `output_filename`, where the file created by the sandboxed process
is copied upon successful completion; an empty temporary directory
is made visible as the parent directory of this file in the sandbox.
Optional: one valid use case is to invoke an external process
to inspect metadata present in a file.
"""
try:
bwrap_path = _get_bwrap_path()
except RuntimeError: # pragma: no cover
# bubblewrap is not installed ⇒ short-circuit
return subprocess.run(args, **kwargs)
with tempfile.TemporaryDirectory() as tempdir:
prefix_args = [bwrap_path] + \
_get_bwrap_args(input_filename=input_filename,
output_filename=output_filename,
tempdir=tempdir)
completed_process = subprocess.run(prefix_args + args, **kwargs)
if output_filename and completed_process.returncode == 0:
shutil.copy(os.path.join(tempdir, os.path.basename(output_filename)),
output_filename)
return completed_process

115
libmat2/epub.py Normal file
View File

@@ -0,0 +1,115 @@
import logging
import re
import uuid
import zipfile
import xml.etree.ElementTree as ET # type: ignore
from typing import Any, Dict
from . import archive, office
class EPUBParser(archive.ZipParser):
mimetypes = {'application/epub+zip', }
metadata_namespace = '{http://purl.org/dc/elements/1.1/}'
def __init__(self, filename):
super().__init__(filename)
self.files_to_keep = set(map(re.compile, { # type: ignore
'META-INF/container.xml',
'mimetype',
'OEBPS/content.opf',
'content.opf',
'hmh.opf',
'OPS/.+.xml'
}))
self.files_to_omit = set(map(re.compile, { # type: ignore
'iTunesMetadata.plist',
'META-INF/calibre_bookmarks.txt',
'OEBPS/package.opf',
}))
self.uniqid = uuid.uuid4()
def is_archive_valid(self):
super().is_archive_valid()
with zipfile.ZipFile(self.filename) as zin:
for item in self._get_all_members(zin):
member_name = self._get_member_name(item)
if member_name.endswith('META-INF/encryption.xml'):
raise ValueError('the file contains encrypted fonts')
def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]:
if not file_path.endswith('.opf'):
return {}
with open(full_path, encoding='utf-8') as f:
try:
results = re.findall(r"<((?:meta|dc|cp).+?)[^>]*>(.+)</\1>",
f.read(), re.I|re.M)
return {k:v for (k, v) in results}
except (TypeError, UnicodeDecodeError):
return {file_path: 'harmful content', }
def _specific_cleanup(self, full_path: str) -> bool:
if full_path.endswith('hmh.opf') or full_path.endswith('content.opf'):
return self.__handle_contentopf(full_path)
elif full_path.endswith('OEBPS/toc.ncx'):
return self.__handle_tocncx(full_path)
elif re.search('/OPS/[^/]+.xml$', full_path):
return self.__handle_ops_xml(full_path)
return True
def __handle_ops_xml(self, full_path: str) -> bool:
try:
tree, namespace = office._parse_xml(full_path)
except ET.ParseError: # pragma: nocover
logging.error("Unable to parse %s in %s.", full_path, self.filename)
return False
for item in tree.iterfind('.//', namespace): # pragma: nocover
if item.tag.strip().lower().endswith('head'):
item.clear()
break
tree.write(full_path, xml_declaration=True, encoding='utf-8',
short_empty_elements=False)
return True
def __handle_tocncx(self, full_path: str) -> bool:
try:
tree, namespace = office._parse_xml(full_path)
except ET.ParseError: # pragma: nocover
logging.error("Unable to parse %s in %s.", full_path, self.filename)
return False
for item in tree.iterfind('.//', namespace): # pragma: nocover
if item.tag.strip().lower().endswith('head'):
item.clear()
ET.SubElement(item, 'meta', attrib={'name': '', 'content': ''})
break
tree.write(full_path, xml_declaration=True, encoding='utf-8',
short_empty_elements=False)
return True
def __handle_contentopf(self, full_path: str) -> bool:
try:
tree, namespace = office._parse_xml(full_path)
except ET.ParseError:
logging.error("Unable to parse %s in %s.", full_path, self.filename)
return False
for item in tree.iterfind('.//', namespace): # pragma: nocover
if item.tag.strip().lower().endswith('metadata'):
item.clear()
# item with mandatory content
uniqid = ET.Element(self.metadata_namespace + 'identifier')
uniqid.text = str(self.uniqid)
uniqid.set('id', 'id')
item.append(uniqid)
# items without mandatory content
for name in ['language', 'title']:
uniqid = ET.Element(self.metadata_namespace + name)
item.append(uniqid)
break # there is only a single <metadata> block
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True

80
libmat2/exiftool.py Normal file
View File

@@ -0,0 +1,80 @@
import functools
import json
import logging
import os
import shutil
import subprocess
from typing import Union, Set, Dict
from . import abstract
from . import bubblewrap
class ExiftoolParser(abstract.AbstractParser):
""" Exiftool is often the easiest way to get all the metadata
from a import file, hence why several parsers are re-using its `get_meta`
method.
"""
meta_allowlist: Set[str] = set()
def get_meta(self) -> Dict[str, Union[str, Dict]]:
try:
if self.sandbox:
out = bubblewrap.run([_get_exiftool_path(), '-json',
self.filename],
input_filename=self.filename,
check=True, stdout=subprocess.PIPE).stdout
else:
out = subprocess.run([_get_exiftool_path(), '-json',
self.filename],
check=True, stdout=subprocess.PIPE).stdout
except subprocess.CalledProcessError: # pragma: no cover
raise ValueError
meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_allowlist:
meta.pop(key, None)
return meta
def _lightweight_cleanup(self) -> bool:
if os.path.exists(self.output_filename):
try: # exiftool can't force output to existing files
os.remove(self.output_filename)
except OSError as e: # pragma: no cover
logging.error("The output file %s is already existing and \
can't be overwritten: %s.", self.filename, e)
return False
# Note: '-All=' must be followed by a known exiftool option.
# Also, '-CommonIFD0' is needed for .tiff files
cmd = [_get_exiftool_path(),
'-all=', # remove metadata
'-adobe=', # remove adobe-specific metadata
'-exif:all=', # remove all exif metadata
'-Time:All=', # remove all timestamps
'-quiet', # don't show useless logs
'-CommonIFD0=', # remove IFD0 metadata
'-o', self.output_filename,
self.filename]
try:
if self.sandbox:
bubblewrap.run(cmd, check=True,
input_filename=self.filename,
output_filename=self.output_filename)
else:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e: # pragma: no cover
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
return False
return True
@functools.lru_cache(maxsize=None)
def _get_exiftool_path() -> str: # pragma: no cover
which_path = shutil.which('exiftool')
if which_path:
return which_path
# Exiftool on Arch Linux has a weird path
if os.access('/usr/bin/vendor_perl/exiftool', os.X_OK):
return '/usr/bin/vendor_perl/exiftool'
raise RuntimeError("Unable to find exiftool")

View File

@@ -1,13 +1,13 @@
import shutil
from typing import Dict
from typing import Union, Dict
from . import abstract
class HarmlessParser(abstract.AbstractParser):
""" This is the parser for filetypes that do not contain metadata. """
mimetypes = {'text/plain', 'image/x-ms-bmp'}
""" This is the parser for filetypes that can not contain metadata. """
mimetypes = {'text/plain', 'image/x-ms-bmp', 'image/bmp'}
def get_meta(self) -> Dict[str, str]:
def get_meta(self) -> Dict[str, Union[str, Dict]]:
return dict()
def remove_all(self) -> bool:

View File

@@ -1,50 +1,63 @@
import subprocess
import imghdr
import json
import os
import shutil
import tempfile
import re
from typing import Union, Any, Dict
import cairo
import gi
gi.require_version('GdkPixbuf', '2.0')
from gi.repository import GdkPixbuf
gi.require_version('Rsvg', '2.0')
from gi.repository import GdkPixbuf, GLib, Rsvg
from . import abstract
from . import exiftool, abstract
class SVGParser(exiftool.ExiftoolParser):
mimetypes = {'image/svg+xml', }
meta_allowlist = {'Directory', 'ExifToolVersion', 'FileAccessDate',
'FileInodeChangeDate', 'FileModifyDate', 'FileName',
'FilePermissions', 'FileSize', 'FileType',
'FileTypeExtension', 'ImageHeight', 'ImageWidth',
'MIMEType', 'SVGVersion', 'SourceFile', 'ViewBox'
}
class _ImageParser(abstract.AbstractParser):
@staticmethod
def __handle_problematic_filename(filename: str, callback) -> str:
""" This method takes a filename with a problematic name,
and safely applies it a `callback`."""
tmpdirname = tempfile.mkdtemp()
fname = os.path.join(tmpdirname, "temp_file")
shutil.copy(filename, fname)
out = callback(fname)
shutil.rmtree(tmpdirname)
return out
def remove_all(self) -> bool:
try:
svg = Rsvg.Handle.new_from_file(self.filename)
except GLib.GError:
raise ValueError
def get_meta(self):
""" There is no way to escape the leading(s) dash(es) of the current
self.filename to prevent parameter injections, so we need to take care
of this.
"""
fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f])
if re.search('^[a-z0-9/]', self.filename) is None:
out = self.__handle_problematic_filename(self.filename, fun)
else:
out = fun(self.filename)
meta = json.loads(out.decode('utf-8'))[0]
for key in self.meta_whitelist:
meta.pop(key, None)
try:
_, _, _, _, has_viewbox, viewbox = svg.get_intrinsic_dimensions()
if has_viewbox is False:
raise ValueError
_, width, height = svg.get_intrinsic_size_in_pixels()
except AttributeError:
dimensions = svg.get_dimensions()
height, width = dimensions.height, dimensions.width
surface = cairo.SVGSurface(self.output_filename, height, width)
context = cairo.Context(surface)
try:
svg.render_document(context, viewbox)
except AttributeError:
svg.render_cairo(context)
surface.finish()
return True
def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = super().get_meta()
# The namespace is mandatory, but only the …/2000/svg is valid.
ns = 'http://www.w3.org/2000/svg'
if meta.get('Xmlns') == ns:
meta.pop('Xmlns')
return meta
class PNGParser(_ImageParser):
class PNGParser(exiftool.ExiftoolParser):
mimetypes = {'image/png', }
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate',
'FileAccessDate', 'FileInodeChangeDate',
'FilePermissions', 'FileType', 'FileTypeExtension',
@@ -54,54 +67,85 @@ class PNGParser(_ImageParser):
def __init__(self, filename):
super().__init__(filename)
try: # better fail here than later
cairo.ImageSurface.create_from_png(self.filename)
except MemoryError:
except: # pragma: no cover
# Cairo is returning some weird exceptions :/
raise ValueError
def remove_all(self):
def remove_all(self) -> bool:
if self.lightweight_cleaning:
return self._lightweight_cleanup()
surface = cairo.ImageSurface.create_from_png(self.filename)
surface.write_to_png(self.output_filename)
return True
class GdkPixbufAbstractParser(_ImageParser):
class GIFParser(exiftool.ExiftoolParser):
mimetypes = {'image/gif'}
meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
'ColorResolutionDepth', 'Directory', 'Duration',
'ExifToolVersion', 'FileAccessDate',
'FileInodeChangeDate', 'FileModifyDate', 'FileName',
'FilePermissions', 'FileSize', 'FileType',
'FileTypeExtension', 'FrameCount', 'GIFVersion',
'HasColorMap', 'ImageHeight', 'ImageSize', 'ImageWidth',
'MIMEType', 'Megapixels', 'SourceFile',}
def remove_all(self) -> bool:
return self._lightweight_cleanup()
class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
this has the side-effect of removing metadata completely.
this has the side-effect of completely removing metadata.
"""
_type = ''
def remove_all(self):
_, extension = os.path.splitext(self.filename)
pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
if extension == '.jpg':
extension = '.jpeg' # gdk is picky
pixbuf.savev(self.output_filename, extension[1:], [], [])
return True
def __init__(self, filename):
super().__init__(filename)
if imghdr.what(filename) != self._type: # better safe than sorry
try:
GdkPixbuf.Pixbuf.new_from_file(self.filename)
except GLib.GError:
raise ValueError
def remove_all(self) -> bool:
if self.lightweight_cleaning:
return self._lightweight_cleanup()
_, extension = os.path.splitext(self.filename)
pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
pixbuf = GdkPixbuf.Pixbuf.apply_embedded_orientation(pixbuf)
if extension.lower() == '.jpg':
extension = '.jpeg' # gdk is picky
elif extension.lower() == '.tif':
extension = '.tiff' # gdk is picky
try:
pixbuf.savev(self.output_filename, type=extension[1:],
option_keys=[], option_values=[])
except GLib.GError: # pragma: no cover
return False
return True
class JPGParser(GdkPixbufAbstractParser):
_type = 'jpeg'
mimetypes = {'image/jpeg'}
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate',
'FileAccessDate', "FileInodeChangeDate",
'FilePermissions', 'FileType', 'FileTypeExtension',
'MIMEType', 'ImageWidth', 'ImageSize', 'BitsPerSample',
'ColorComponents', 'EncodingProcess', 'JFIFVersion',
'ResolutionUnit', 'XResolution', 'YCbCrSubSampling',
'YResolution', 'Megapixels', 'ImageHeight'}
'YResolution', 'Megapixels', 'ImageHeight', 'Orientation'}
class TiffParser(GdkPixbufAbstractParser):
_type = 'tiff'
mimetypes = {'image/tiff'}
meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
'FillOrder', 'PhotometricInterpretation',
'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',
'StripByteCounts', 'StripOffsets', 'BitsPerSample',
@@ -109,4 +153,58 @@ class TiffParser(GdkPixbufAbstractParser):
'FileInodeChangeDate', 'FileModifyDate', 'FileName',
'FilePermissions', 'FileSize', 'FileType',
'FileTypeExtension', 'ImageHeight', 'ImageSize',
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'}
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile', 'Orientation'}
class PPMParser(abstract.AbstractParser):
mimetypes = {'image/x-portable-pixmap'}
def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta: Dict[str, Union[str, Dict[Any, Any]]] = dict()
with open(self.filename) as f:
for idx, line in enumerate(f):
if line.lstrip().startswith('#'):
meta[str(idx)] = line.lstrip().rstrip()
return meta
def remove_all(self) -> bool:
with open(self.filename) as fin:
with open(self.output_filename, 'w') as fout:
for line in fin:
if not line.lstrip().startswith('#'):
line = re.sub(r"\s+", "", line, flags=re.UNICODE)
fout.write(line)
return True
class HEICParser(exiftool.ExiftoolParser):
mimetypes = {'image/heic'}
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',
'FileInodeChangeDate', 'FilePermissions', 'FileType',
'FileTypeExtension', 'MIMEType', 'MajorBrand', 'MinorVersion',
'CompatibleBrands','HandlerType', 'PrimaryItemReference',
'HEVCConfigurationVersion', 'GeneralProfileSpace',
'GeneralTierFlag', 'GeneralProfileIDC',
'GenProfileCompatibilityFlags', 'ConstraintIndicatorFlags',
'GeneralLevelIDC', 'MinSpatialSegmentationIDC',
'ParallelismType','ChromaFormat', 'BitDepthLuma', 'BitDepthChroma',
'NumTemporalLayers', 'TemporalIDNested', 'ImageWidth',
'ImageHeight', 'ImageSpatialExtent', 'ImagePixelDepth',
'AverageFrameRate', 'ConstantFrameRate', 'MediaDataSize',
'MediaDataOffset','ImageSize', 'Megapixels'}
def remove_all(self) -> bool:
return self._lightweight_cleanup()
class WEBPParser(GdkPixbufAbstractParser):
mimetypes = {'image/webp'}
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
'Directory', 'FileSize', 'FileModifyDate',
'FileAccessDate', "FileInodeChangeDate",
'FilePermissions', 'FileType', 'FileTypeExtension',
'MIMEType', 'ImageWidth', 'ImageSize', 'BitsPerSample',
'ColorComponents', 'EncodingProcess', 'JFIFVersion',
'ResolutionUnit', 'XResolution', 'YCbCrSubSampling',
'YResolution', 'Megapixels', 'ImageHeight', 'Orientation',
'HorizontalScale', 'VerticalScale', 'VP8Version'}

View File

@@ -1,209 +1,554 @@
import random
import uuid
import logging
import os
import re
import shutil
import tempfile
import datetime
import zipfile
import xml.etree.ElementTree as ET
from typing import Dict, Set, Pattern
from typing import Pattern, Any, Tuple, Dict
import xml.etree.ElementTree as ET # type: ignore
from .archive import ZipParser
# pylint: disable=line-too-long
from . import abstract, parser_factory
def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
""" This function parses XML, with namespace support. """
namespace_map = dict()
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
# The ns[0-9]+ namespaces are reserved for internal usage, so
# we have to use an other nomenclature.
if re.match('^ns[0-9]+$', key, re.I): # pragma: no cover
key = 'mat' + key[2:]
# Make pyflakes happy
assert Set
assert Pattern
namespace_map[key] = value
ET.register_namespace(key, value)
def _parse_xml(full_path: str):
""" This function parse XML with namespace support. """
def parse_map(f): # etree support for ns is a bit rough
ns_map = dict()
for event, (k, v) in ET.iterparse(f, ("start-ns", )):
if event == "start-ns":
ns_map[k] = v
return ns_map
ns = parse_map(full_path)
# Register the namespaces
for k, v in ns.items():
ET.register_namespace(k, v)
return ET.parse(full_path), ns
return ET.parse(full_path), namespace_map
class ArchiveBasedAbstractParser(abstract.AbstractParser):
# Those are the files that have a format that _isn't_
# supported by MAT2, but that we want to keep anyway.
files_to_keep = set() # type: Set[str]
def _sort_xml_attributes(full_path: str) -> bool:
""" Sort xml attributes lexicographically,
because it's possible to fingerprint producers (MS Office, Libreoffice, …)
since they are all using different orders.
"""
tree = ET.parse(full_path)
# Those are the files that we _do not_ want to keep,
# no matter if they are supported or not.
files_to_omit = set() # type: Set[Pattern]
for c in tree.getroot():
c[:] = sorted(c, key=lambda child: (child.tag, child.get('desc')))
def __init__(self, filename):
super().__init__(filename)
try: # better fail here than later
zipfile.ZipFile(self.filename)
except zipfile.BadZipFile:
raise ValueError
def _specific_cleanup(self, full_path: str) -> bool:
""" This method can be used to apply specific treatment
to files present in the archive."""
return True
def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
zipinfo.create_system = 3 # Linux
zipinfo.comment = b''
zipinfo.date_time = (1980, 1, 1, 0, 0, 0)
return zipinfo
def _get_zipinfo_meta(self, zipinfo: zipfile.ZipInfo) -> Dict[str, str]:
metadata = {}
if zipinfo.create_system == 3:
#metadata['create_system'] = 'Linux'
pass
elif zipinfo.create_system == 2:
metadata['create_system'] = 'Windows'
else:
metadata['create_system'] = 'Weird'
if zipinfo.comment:
metadata['comment'] = zipinfo.comment # type: ignore
if zipinfo.date_time != (1980, 1, 1, 0, 0, 0):
metadata['date_time'] = str(datetime.datetime(*zipinfo.date_time))
return metadata
def remove_all(self) -> bool:
with zipfile.ZipFile(self.filename) as zin,\
zipfile.ZipFile(self.output_filename, 'w') as zout:
temp_folder = tempfile.mkdtemp()
for item in zin.infolist():
if item.filename[-1] == '/': # `is_dir` is added in Python3.6
continue # don't keep empty folders
zin.extract(member=item, path=temp_folder)
full_path = os.path.join(temp_folder, item.filename)
if self._specific_cleanup(full_path) is False:
shutil.rmtree(temp_folder)
os.remove(self.output_filename)
print("Something went wrong during deep cleaning of %s" % item.filename)
return False
if item.filename in self.files_to_keep:
# those files aren't supported, but we want to add them anyway
pass
elif any(map(lambda r: r.search(item.filename), self.files_to_omit)):
continue
else:
# supported files that we want to clean then add
tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
if not tmp_parser:
shutil.rmtree(temp_folder)
os.remove(self.output_filename)
print("%s's format (%s) isn't supported" % (item.filename, mtype))
return False
tmp_parser.remove_all()
os.rename(tmp_parser.output_filename, full_path)
zinfo = zipfile.ZipInfo(item.filename) # type: ignore
clean_zinfo = self._clean_zipinfo(zinfo)
with open(full_path, 'rb') as f:
zout.writestr(clean_zinfo, f.read())
shutil.rmtree(temp_folder)
return True
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
class MSOfficeParser(ArchiveBasedAbstractParser):
class MSOfficeParser(ZipParser):
"""
The methods modifying XML documents are usually doing so in two loops:
1. finding the tag/attributes to remove;
2. actually editing the document
since it's tricky to modify the XML while iterating on it.
"""
mimetypes = {
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'application/vnd.openxmlformats-officedocument.presentationml.presentation'
}
files_to_keep = {
'[Content_Types].xml',
'_rels/.rels',
'word/_rels/document.xml.rels',
'word/document.xml',
'word/fontTable.xml',
'word/settings.xml',
'word/styles.xml',
content_types_to_keep = {
'application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml', # /word/endnotes.xml
'application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml', # /word/footnotes.xml
'application/vnd.openxmlformats-officedocument.extended-properties+xml', # /docProps/app.xml
'application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml', # /word/document.xml
'application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml', # /word/fontTable.xml
'application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml', # /word/footer.xml
'application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml', # /word/header.xml
'application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml', # /word/styles.xml
'application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml', # /word/numbering.xml (used for bullet point formatting)
'application/vnd.openxmlformats-officedocument.theme+xml', # /word/theme/theme[0-9].xml (used for font and background coloring, etc.)
'application/vnd.openxmlformats-package.core-properties+xml', # /docProps/core.xml
# for more complicated powerpoints
'application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml',
'application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml',
'application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml',
'application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml',
'application/vnd.openxmlformats-officedocument.drawingml.diagramLayout+xml',
'application/vnd.openxmlformats-officedocument.drawingml.diagramStyle+xml',
'application/vnd.openxmlformats-officedocument.drawingml.diagramColors+xml',
'application/vnd.ms-office.drawingml.diagramDrawing+xml',
# Do we want to keep the following ones?
'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml',
}
files_to_omit = set(map(re.compile, { # type: ignore
'^docProps/',
}))
def __remove_revisions(self, full_path: str) -> bool:
""" In this function, we're changing the XML
document in two times, since we don't want
to change the tree we're iterating on."""
tree, ns = _parse_xml(full_path)
def __init__(self, filename):
super().__init__(filename)
# No revisions are present
if tree.find('.//w:del', ns) is None:
return True
elif tree.find('.//w:ins', ns) is None:
# MSOffice documents are using various counters for cross-references,
# we collect them all, to make sure that they're effectively counters,
# and not unique id used for fingerprinting.
self.__counters = {
'cNvPr': set(),
'rid': set(),
}
self.files_to_keep = set(map(re.compile, { # type: ignore
r'^\[Content_Types\]\.xml$',
r'^_rels/\.rels$',
r'^xl/sharedStrings\.xml$', # https://docs.microsoft.com/en-us/office/open-xml/working-with-the-shared-string-table
r'^xl/calcChain\.xml$',
r'^(?:word|ppt|xl)/_rels/(document|workbook|presentation)\.xml\.rels$',
r'^(?:word|ppt|xl)/_rels/footer[0-9]*\.xml\.rels$',
r'^(?:word|ppt|xl)/_rels/header[0-9]*\.xml\.rels$',
r'^(?:word|ppt|xl)/charts/_rels/chart[0-9]+\.xml\.rels$',
r'^(?:word|ppt|xl)/charts/colors[0-9]+\.xml$',
r'^(?:word|ppt|xl)/charts/style[0-9]+\.xml$',
r'^(?:word|ppt|xl)/drawings/_rels/drawing[0-9]+\.xml\.rels$',
r'^(?:word|ppt|xl)/styles\.xml$',
# TODO: randomize axId ( https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oi29500/089f849f-fcd6-4fa0-a281-35aa6a432a16 )
r'^(?:word|ppt|xl)/charts/chart[0-9]*\.xml$',
r'^xl/workbook\.xml$',
r'^xl/worksheets/sheet[0-9]+\.xml$',
r'^ppt/slideLayouts/_rels/slideLayout[0-9]+\.xml\.rels$',
r'^ppt/slideLayouts/slideLayout[0-9]+\.xml$',
r'^(?:word|ppt|xl)/tableStyles\.xml$',
r'^(?:word|ppt|xl)/tables/table[0-9]+\.xml$',
r'^ppt/slides/_rels/slide[0-9]*\.xml\.rels$',
r'^ppt/slides/slide[0-9]*\.xml$',
# https://msdn.microsoft.com/en-us/library/dd908153(v=office.12).aspx
r'^(?:word|ppt|xl)/stylesWithEffects\.xml$',
r'^ppt/presentation\.xml$',
# TODO: check if p:bgRef can be randomized
r'^ppt/slideMasters/slideMaster[0-9]+\.xml',
r'^ppt/slideMasters/_rels/slideMaster[0-9]+\.xml\.rels',
r'^xl/worksheets/_rels/sheet[0-9]+\.xml\.rels',
r'^(?:word|ppt|xl)/drawings/vmlDrawing[0-9]+\.vml',
r'^(?:word|ppt|xl)/drawings/drawing[0-9]+\.xml',
r'^(?:word|ppt|xl)/embeddings/Microsoft_Excel_Worksheet[0-9]+\.xlsx',
# rels for complicated powerpoints
r'^ppt/notesSlides/_rels/notesSlide[0-9]+\.xml\.rels',
r'^ppt/notesMasters/_rels/notesMaster[0-9]+\.xml\.rels',
r'^ppt/handoutMasters/_rels/handoutMaster[0-9]+\.xml\.rels',
}))
self.files_to_omit = set(map(re.compile, { # type: ignore
r'^\[trash\]/',
r'^customXml/',
r'webSettings\.xml$',
r'^docProps/custom\.xml$',
r'^(?:word|ppt|xl)/printerSettings/',
r'^(?:word|ppt|xl)/theme',
r'^(?:word|ppt|xl)/people\.xml$',
r'^(?:word|ppt|xl)/persons/person\.xml$',
r'^(?:word|ppt|xl)/numbering\.xml$',
r'^(?:word|ppt|xl)/tags/',
r'^(?:word|ppt|xl)/glossary/',
# View properties like view mode, last viewed slide etc
r'^(?:word|ppt|xl)/viewProps\.xml$',
# Additional presentation-wide properties like printing properties,
# presentation show properties etc.
r'^(?:word|ppt|xl)/presProps\.xml$',
r'^(?:word|ppt|xl)/comments[0-9]*\.xml$',
r'^(?:word|ppt|xl)/threadedComments/threadedComment[0-9]*\.xml$',
r'^(?:word|ppt|xl)/commentsExtended\.xml$',
r'^(?:word|ppt|xl)/commentsExtensible\.xml$',
r'^(?:word|ppt|xl)/commentsIds\.xml$',
# we have an allowlist in self.files_to_keep,
# so we can trash everything else
r'^(?:word|ppt|xl)/_rels/',
r'docMetadata/LabelInfo\.xml$'
}))
if self.__fill_files_to_keep_via_content_types() is False:
raise ValueError
def __fill_files_to_keep_via_content_types(self) -> bool:
""" There is a suer-handy `[Content_Types].xml` file
in MS Office archives, describing what each other file contains.
The self.content_types_to_keep member contains a type allowlist,
so we're using it to fill the self.files_to_keep one.
"""
with zipfile.ZipFile(self.filename) as zin:
if '[Content_Types].xml' not in zin.namelist():
return False
xml_data = zin.read('[Content_Types].xml')
self.content_types: Dict[str, str] = dict()
try:
tree = ET.fromstring(xml_data)
except ET.ParseError:
return False
for c in tree:
if 'PartName' not in c.attrib or 'ContentType' not in c.attrib: # pragma: no cover
continue
elif c.attrib['ContentType'] in self.content_types_to_keep:
fname = c.attrib['PartName'][1:] # remove leading `/`
re_fname = re.compile('^' + re.escape(fname) + '$')
self.files_to_keep.add(re_fname) # type: ignore
return True
@staticmethod
def __remove_rsid(full_path: str) -> bool:
""" The method will remove "revision session ID". We're using '}rsid'
instead of proper parsing, since rsid can have multiple forms, like
`rsidRDefault`, `rsidR`, `rsids`, …
For more details, see
- https://msdn.microsoft.com/en-us/library/office/documentformat.openxml.wordprocessing.previoussectionproperties.rsidrpr.aspx
- https://blogs.msdn.microsoft.com/brian_jones/2006/12/11/whats-up-with-all-those-rsids/
"""
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
# rsid, tags or attributes, are always under the `w` namespace
if 'w' not in namespace:
return True
parent_map = {c:p for p in tree.iter() for c in p}
elements = list([element for element in tree.iterfind('.//w:del', ns)])
for element in elements:
elements_to_remove = list()
for item in tree.iterfind('.//', namespace):
if '}rsid' in item.tag.strip().lower(): # rsid as tag
elements_to_remove.append(item)
continue
for key in list(item.attrib.keys()): # rsid as attribute
if '}rsid' in key.lower():
del item.attrib[key]
for element in elements_to_remove:
parent_map[element].remove(element)
elements = list()
for element in tree.iterfind('.//w:ins', ns):
for position, item in enumerate(tree.iter()):
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
@staticmethod
def __remove_nsid(full_path: str) -> bool:
"""
nsid are random identifiers that can be used to ease the merging of
some components of a document. They can also be used for
fingerprinting.
See the spec for more details: https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.wordprocessing.nsid?view=openxml-2.8.1
"""
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
# The nsid tag is always under the `w` namespace
if 'w' not in namespace:
return True
parent_map = {c: p for p in tree.iter() for c in p}
elements_to_remove = list()
for element in tree.iterfind('.//w:nsid', namespace):
elements_to_remove.append(element)
for element in elements_to_remove:
parent_map[element].remove(element)
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
@staticmethod
def __remove_revisions(full_path: str) -> bool:
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
# Revisions are either deletions (`w:del`) or
# insertions (`w:ins`)
del_presence = tree.find('.//w:del', namespace)
ins_presence = tree.find('.//w:ins', namespace)
if del_presence is None and ins_presence is None:
return True # No revisions are present
parent_map = {c:p for p in tree.iter() for c in p}
elements_del = list()
for element in tree.iterfind('.//w:del', namespace):
elements_del.append(element)
for element in elements_del:
parent_map[element].remove(element)
elements_ins = list()
for element in tree.iterfind('.//w:ins', namespace):
for position, item in enumerate(tree.iter()): # pragma: no cover
if item == element:
for children in element.iterfind('./*'):
elements.append((element, position, children))
elements_ins.append((element, position, children))
break
for (element, position, children) in elements:
for (element, position, children) in elements_ins:
parent_map[element].insert(position, children)
# the list can sometimes contain duplicate elements, so don't remove
# until all children have been processed
for (element, position, children) in elements_ins:
if element in parent_map[element]:
parent_map[element].remove(element)
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
@staticmethod
def __remove_document_comment_meta(full_path: str) -> bool:
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
# search the docs to see if we can bail early
range_start = tree.find('.//w:commentRangeStart', namespace)
range_end = tree.find('.//w:commentRangeEnd', namespace)
references = tree.find('.//w:commentReference', namespace)
if range_start is None and range_end is None and references is None:
return True # No comment meta tags are present
parent_map = {c:p for p in tree.iter() for c in p}
# iterate over the elements and add them to list
elements_del = list()
for element in tree.iterfind('.//w:commentRangeStart', namespace):
elements_del.append(element)
for element in tree.iterfind('.//w:commentRangeEnd', namespace):
elements_del.append(element)
for element in tree.iterfind('.//w:commentReference', namespace):
elements_del.append(element)
# remove the elements
for element in elements_del:
parent_map[element].remove(element)
tree.write(full_path, xml_declaration=True)
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
def __remove_document_xml_rels_members(self, full_path: str) -> bool:
""" Remove the dangling references from the word/_rels/document.xml.rels file, since MS office doesn't like them.
"""
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
if len(namespace.items()) != 1: # pragma: no cover
logging.debug("Got several namespaces for Types: %s", namespace.items())
removed_fnames = set()
with zipfile.ZipFile(self.filename) as zin:
for fname in [item.filename for item in zin.infolist()]:
for file_to_omit in self.files_to_omit:
if file_to_omit.search(fname):
matches = map(lambda r: r.search(fname), self.files_to_keep)
if any(matches): # the file is in the allowlist
continue
removed_fnames.add(fname)
break
root = tree.getroot()
for item in root.findall('{%s}Relationship' % namespace['']):
name = 'word/' + item.attrib['Target'] # add the word/ prefix to the path, since all document rels are in the word/ directory
if name in removed_fnames:
root.remove(item)
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
def __remove_content_type_members(self, full_path: str) -> bool:
""" The method will remove the dangling references
form the [Content_Types].xml file, since MS office doesn't like them
"""
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
if len(namespace.items()) != 1: # pragma: no cover
logging.debug("Got several namespaces for Types: %s", namespace.items())
removed_fnames = set()
with zipfile.ZipFile(self.filename) as zin:
for fname in [item.filename for item in zin.infolist()]:
for file_to_omit in self.files_to_omit:
if file_to_omit.search(fname):
matches = map(lambda r: r.search(fname), self.files_to_keep)
if any(matches): # the file is in the allowlist
continue
removed_fnames.add(fname)
break
root = tree.getroot()
for item in root.findall('{%s}Override' % namespace['']):
name = item.attrib['PartName'][1:] # remove the leading '/'
if name in removed_fnames:
root.remove(item)
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
def _final_checks(self) -> bool:
for k, v in self.__counters.items():
if v and len(v) != max(v):
# TODO: make this an error and return False
# once the ability to correct the counters is implemented
logging.warning("%s contains invalid %s: %s", self.filename, k, v)
return True
return True
def __collect_counters(self, full_path: str):
with open(full_path, encoding='utf-8') as f:
content = f.read()
# "relationship Id"
for i in re.findall(r'(?:\s|r:)[iI][dD]="rId([0-9]+)"(?:\s|/)', content):
self.__counters['rid'].add(int(i))
# "connector for Non-visual property"
for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content):
self.__counters['cNvPr'].add(int(i))
@staticmethod
def __randomize_creationId(full_path: str) -> bool:
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
if 'p14' not in namespace:
return True # pragma: no cover
for item in tree.iterfind('.//p14:creationId', namespace):
item.set('val', '%s' % random.randint(0, 2**32))
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
@staticmethod
def __randomize_sldMasterId(full_path: str) -> bool:
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
if 'p' not in namespace:
return True # pragma: no cover
for item in tree.iterfind('.//p:sldMasterId', namespace):
item.set('id', '%s' % random.randint(0, 2**32))
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
def _specific_cleanup(self, full_path: str) -> bool:
if full_path.endswith('/word/document.xml'):
return self.__remove_revisions(full_path)
# pylint: disable=too-many-return-statements,too-many-branches
if os.stat(full_path).st_size == 0: # Don't process empty files
return True
if not full_path.endswith(('.xml', '.xml.rels')):
return True
if self.__randomize_creationId(full_path) is False:
return False
self.__collect_counters(full_path)
if full_path.endswith('/[Content_Types].xml'):
# this file contains references to files that we might
# remove, and MS Office doesn't like dangling references
if self.__remove_content_type_members(full_path) is False: # pragma: no cover
return False
elif full_path.endswith('/word/document.xml'):
# this file contains the revisions
if self.__remove_revisions(full_path) is False:
return False # pragma: no cover
# remove comment references and ranges
if self.__remove_document_comment_meta(full_path) is False:
return False # pragma: no cover
elif full_path.endswith('/word/_rels/document.xml.rels'):
# similar to the above, but for the document.xml.rels file
if self.__remove_document_xml_rels_members(full_path) is False: # pragma: no cover
return False
elif full_path.endswith('/docProps/app.xml'):
# This file must be present and valid,
# so we're removing as much as we can.
with open(full_path, 'wb') as f:
f.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
f.write(b'<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties">')
f.write(b'</Properties>')
elif full_path.endswith('/docProps/core.xml'):
# This file must be present and valid,
# so we're removing as much as we can.
with open(full_path, 'wb') as f:
f.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
f.write(b'<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties">')
f.write(b'</cp:coreProperties>')
elif full_path.endswith('/ppt/tableStyles.xml'): # pragma: no cover
# This file must be present and valid,
# so we're removing as much as we can.
with open(full_path, 'wb') as f:
f.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
uid = str(uuid.uuid4()).encode('utf-8')
f.write(b'<a:tblStyleLst def="{%s}" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"/>' % uid)
elif full_path.endswith('ppt/presentation.xml'):
if self.__randomize_sldMasterId(full_path) is False:
return False # pragma: no cover
if self.__remove_rsid(full_path) is False:
return False # pragma: no cover
if self.__remove_nsid(full_path) is False:
return False # pragma: no cover
try:
_sort_xml_attributes(full_path)
except ET.ParseError as e: # pragma: no cover
logging.error("Unable to parse %s: %s", full_path, e)
return False
# This is awful, I'm sorry.
#
# Microsoft Office isn't happy when we have the `mc:Ignorable`
# tag containing namespaces that aren't present in the xml file,
# so instead of trying to remove this specific tag with etree,
# we're removing it, with a regexp.
#
# Since we're the ones producing this file, via the call to
# _sort_xml_attributes, there won't be any "funny tricks".
# Worst case, the tag isn't present, and everything is fine.
#
# see: https://docs.microsoft.com/en-us/dotnet/framework/wpf/advanced/mc-ignorable-attribute
with open(full_path, 'rb') as f:
text = f.read()
out = re.sub(b'mc:Ignorable="[^"]*"', b'', text, count=1)
with open(full_path, 'wb') as f:
f.write(out)
return True
def get_meta(self) -> Dict[str, str]:
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
"""
Yes, I know that parsing xml with regexp ain't pretty,
be my guest and fix it if you want.
"""
metadata = {}
zipin = zipfile.ZipFile(self.filename)
for item in zipin.infolist():
if item.filename.startswith('docProps/') and item.filename.endswith('.xml'):
content = zipin.read(item).decode('utf-8')
try:
results = re.findall(r"<(.+)>(.+)</\1>", content, re.I|re.M)
for (key, value) in results:
metadata[key] = value
except TypeError: # We didn't manage to parse the xml file
pass
if not metadata: # better safe than sorry
metadata[item] = 'harmful content'
for key, value in self._get_zipinfo_meta(item).items():
metadata[key] = value
zipin.close()
return metadata
if not file_path.startswith('docProps/') or not file_path.endswith('.xml'):
return {}
with open(full_path, encoding='utf-8') as f:
try:
results = re.findall(r"<(.+)>(.+)</\1>", f.read(), re.I | re.M)
return {k: v for (k, v) in results}
except (TypeError, UnicodeDecodeError):
# We didn't manage to parse the xml file
return {file_path: 'harmful content', }
class LibreOfficeParser(ArchiveBasedAbstractParser):
class LibreOfficeParser(ZipParser):
mimetypes = {
'application/vnd.oasis.opendocument.text',
'application/vnd.oasis.opendocument.spreadsheet',
@@ -213,59 +558,70 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
'application/vnd.oasis.opendocument.formula',
'application/vnd.oasis.opendocument.image',
}
files_to_keep = {
'META-INF/manifest.xml',
'content.xml',
'manifest.rdf',
'mimetype',
'settings.xml',
'styles.xml',
}
files_to_omit = set(map(re.compile, { # type: ignore
r'^meta\.xml$',
'^Configurations2/',
'^Thumbnails/',
}))
def __init__(self, filename):
super().__init__(filename)
def __remove_revisions(self, full_path: str) -> bool:
tree, ns = _parse_xml(full_path)
self.files_to_keep = set(map(re.compile, { # type: ignore
r'^META-INF/manifest\.xml$',
r'^content\.xml$',
r'^manifest\.rdf$',
r'^mimetype$',
r'^settings\.xml$',
r'^styles\.xml$',
}))
self.files_to_omit = set(map(re.compile, { # type: ignore
r'^meta\.xml$',
r'^layout-cache$',
r'^Configurations2/',
r'^Thumbnails/',
}))
if 'office' not in ns.keys(): # no revisions in the current file
@staticmethod
def __remove_revisions(full_path: str) -> bool:
try:
tree, namespace = _parse_xml(full_path)
except ET.ParseError as e:
logging.error("Unable to parse %s: %s", full_path, e)
return False
if 'office' not in namespace: # no revisions in the current file
return True
for text in tree.getroot().iterfind('.//office:text', ns):
for changes in text.iterfind('.//text:tracked-changes', ns):
for text in tree.getroot().iterfind('.//office:text', namespace):
for changes in text.iterfind('.//text:tracked-changes', namespace):
text.remove(changes)
tree.write(full_path, xml_declaration=True)
tree.write(full_path, xml_declaration=True, encoding='utf-8')
return True
def _specific_cleanup(self, full_path: str) -> bool:
if os.path.basename(full_path) == 'content.xml':
return self.__remove_revisions(full_path)
if os.stat(full_path).st_size == 0: # Don't process empty files
return True
if os.path.basename(full_path).endswith('.xml'):
if os.path.basename(full_path) == 'content.xml':
if self.__remove_revisions(full_path) is False:
return False
try:
_sort_xml_attributes(full_path)
except ET.ParseError as e:
logging.error("Unable to parse %s: %s", full_path, e)
return False
return True
def get_meta(self) -> Dict[str, str]:
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
"""
Yes, I know that parsing xml with regexp ain't pretty,
be my guest and fix it if you want.
"""
metadata = {}
zipin = zipfile.ZipFile(self.filename)
for item in zipin.infolist():
if item.filename == 'meta.xml':
content = zipin.read(item).decode('utf-8')
try:
results = re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", content, re.I|re.M)
for (key, value) in results:
metadata[key] = value
except TypeError: # We didn't manage to parse the xml file
pass
if not metadata: # better safe than sorry
metadata[item] = 'harmful content'
for key, value in self._get_zipinfo_meta(item).items():
metadata[key] = value
zipin.close()
return metadata
if file_path != 'meta.xml':
return {}
with open(full_path, encoding='utf-8') as f:
try:
results = re.findall(r"<((?:meta|dc|cp).+?)[^>]*>(.+)</\1>", f.read(), re.I|re.M)
return {k:v for (k, v) in results}
except (TypeError, UnicodeDecodeError): # We didn't manage to parse the xml file
# We didn't manage to parse the xml file
return {file_path: 'harmful content', }

View File

@@ -2,26 +2,38 @@ import glob
import os
import mimetypes
import importlib
from typing import TypeVar, List, Tuple, Optional
from typing import TypeVar, Optional, List, Tuple
from . import abstract, unsupported_extensions
assert Tuple # make pyflakes happy
from . import abstract, UNSUPPORTED_EXTENSIONS
T = TypeVar('T', bound='abstract.AbstractParser')
mimetypes.add_type('application/epub+zip', '.epub')
mimetypes.add_type('application/x-dtbncx+xml', '.ncx') # EPUB Navigation Control XML File
# This should be removed after we move to python3.10
# https://github.com/python/cpython/commit/20a5b7e986377bdfd929d7e8c4e3db5847dfdb2d
mimetypes.add_type('image/heic', '.heic')
def __load_all_parsers():
""" Loads every parser in a dynamic way """
current_dir = os.path.dirname(__file__)
for name in glob.glob(os.path.join(current_dir, '*.py')):
if name.endswith('abstract.py') or name.endswith('__init__.py'):
for fname in glob.glob(os.path.join(current_dir, '*.py')):
if fname.endswith('abstract.py'):
continue
basename = os.path.basename(name)
elif fname.endswith('__init__.py'):
continue
elif fname.endswith('exiftool.py'):
continue
basename = os.path.basename(fname)
name, _ = os.path.splitext(basename)
importlib.import_module('.' + name, package='libmat2')
__load_all_parsers()
def _get_parsers() -> List[T]:
""" Get all our parsers!"""
def __get_parsers(cls):
@@ -31,16 +43,22 @@ def _get_parsers() -> List[T]:
def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]:
""" Return the appropriate parser for a given filename.
:raises ValueError: Raised if the instantiation of the parser went wrong.
"""
mtype, _ = mimetypes.guess_type(filename)
_, extension = os.path.splitext(filename)
if extension in unsupported_extensions:
if extension.lower() in UNSUPPORTED_EXTENSIONS:
return None, mtype
if mtype == 'application/x-tar':
if extension[1:] in ('bz2', 'gz', 'xz'):
mtype = mtype + '+' + extension[1:]
for parser_class in _get_parsers(): # type: ignore
if mtype in parser_class.mimetypes:
try:
return parser_class(filename), mtype
except ValueError:
return None, mtype
# This instantiation might raise a ValueError on malformed files
return parser_class(filename), mtype
return None, mtype

View File

@@ -7,7 +7,7 @@ import re
import logging
import tempfile
import io
from distutils.version import LooseVersion
from typing import Union, Dict
import cairo
import gi
@@ -16,12 +16,7 @@ from gi.repository import Poppler, GLib
from . import abstract
logging.basicConfig(level=logging.DEBUG)
poppler_version = Poppler.get_version()
if LooseVersion(poppler_version) < LooseVersion('0.46'):
raise ValueError("MAT2 needs at least Poppler version 0.46 to work. \
The installed version is %s." % poppler_version)
FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5
class PDFParser(abstract.AbstractParser):
@@ -33,13 +28,21 @@ class PDFParser(abstract.AbstractParser):
def __init__(self, filename):
super().__init__(filename)
self.uri = 'file://' + os.path.abspath(self.filename)
self.__scale = 2 # how much precision do we want for the render
self.__scale = 200 / 72.0 # how much precision do we want for the render
try: # Check now that the file is valid, to avoid surprises later
Poppler.Document.new_from_file(self.uri, None)
except GLib.GError: # Invalid PDF
raise ValueError
def remove_all_lightweight(self):
def remove_all(self) -> bool:
if self.lightweight_cleaning is True:
try:
return self.__remove_all_lightweight()
except (cairo.Error, MemoryError) as e:
raise RuntimeError(e)
return self.__remove_all_thorough()
def __remove_all_lightweight(self) -> bool:
"""
Load the document into Poppler, render pages on a new PDFSurface.
"""
@@ -47,7 +50,8 @@ class PDFParser(abstract.AbstractParser):
pages_count = document.get_n_pages()
tmp_path = tempfile.mkstemp()[1]
pdf_surface = cairo.PDFSurface(tmp_path, 10, 10)
pdf_surface = cairo.PDFSurface(tmp_path, 10, 10) # resized later anyway
pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
pdf_context = cairo.Context(pdf_surface) # context draws on the surface
for pagenum in range(pages_count):
@@ -66,7 +70,7 @@ class PDFParser(abstract.AbstractParser):
return True
def remove_all(self):
def __remove_all_thorough(self) -> bool:
"""
Load the document into Poppler, render pages on PNG,
and shove those PNG into a new PDF.
@@ -76,15 +80,19 @@ class PDFParser(abstract.AbstractParser):
_, tmp_path = tempfile.mkstemp()
pdf_surface = cairo.PDFSurface(tmp_path, 32, 32) # resized later anyway
pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
pdf_context = cairo.Context(pdf_surface)
for pagenum in range(pages_count):
page = document.get_page(pagenum)
if page is None: # pragma: no cover
logging.error("Unable to get PDF pages")
return False
page_width, page_height = page.get_size()
logging.info("Rendering page %d/%d", pagenum + 1, pages_count)
width = int(page_width) * self.__scale
height = int(page_height) * self.__scale
width = int(page_width * self.__scale)
height = int(page_height * self.__scale)
img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height)
img_context = cairo.Context(img_surface)
@@ -98,10 +106,14 @@ class PDFParser(abstract.AbstractParser):
buf.seek(0)
img = cairo.ImageSurface.create_from_png(buf)
pdf_surface.set_size(page_width*self.__scale, page_height*self.__scale)
if cairo.version_info < (1, 12, 0):
pdf_surface.set_size(width, height)
else:
pdf_surface.set_size(page_width, page_height)
pdf_surface.set_device_scale(1 / self.__scale, 1 / self.__scale)
pdf_context.set_source_surface(img, 0, 0)
pdf_context.paint()
pdf_context.show_page()
pdf_context.show_page() # draw pdf_context on pdf_surface
pdf_surface.finish()
@@ -118,17 +130,27 @@ class PDFParser(abstract.AbstractParser):
document.set_creator('')
document.set_creation_date(-1)
document.save('file://' + os.path.abspath(out_file))
# Cairo adds "/Producer" and "/CreationDate", and Poppler sometimes
# fails to remove them, we have to use this terrible regex.
# It should(tm) be alright though, because cairo's output format
# for metadata is fixed.
with open(out_file, 'rb') as f:
out = re.sub(rb'<<[\s\n]*/Producer.*?>>', b' << >>', f.read(),
count=0, flags=re.DOTALL | re.IGNORECASE)
with open(out_file, 'wb') as f:
f.write(out)
return True
@staticmethod
def __parse_metadata_field(data: str) -> dict:
def __parse_metadata_field(data: str) -> Dict[str, str]:
metadata = {}
for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
metadata[key] = value
return metadata
def get_meta(self):
def get_meta(self) -> Dict[str, Union[str, Dict]]:
""" Return a dict with all the meta of the file
"""
metadata = {}

View File

@@ -1,43 +1,42 @@
import logging
from typing import Union, Tuple, Dict
from typing import Union, Dict, List, Tuple
from . import abstract
class TorrentParser(abstract.AbstractParser):
mimetypes = {'application/x-bittorrent', }
whitelist = {b'announce', b'announce-list', b'info'}
allowlist = {b'announce', b'announce-list', b'info'}
def get_meta(self) -> Dict[str, str]:
metadata = {}
def __init__(self, filename):
super().__init__(filename)
with open(self.filename, 'rb') as f:
d = _BencodeHandler().bdecode(f.read())
if d is None:
return {'Unknown meta': 'Unable to parse torrent file "%s".' % self.filename}
for k, v in d.items():
if k not in self.whitelist:
metadata[k.decode('utf-8')] = v
return metadata
self.dict_repr = _BencodeHandler().bdecode(f.read())
if self.dict_repr is None:
raise ValueError
def get_meta(self) -> Dict[str, Union[str, Dict]]:
metadata = {}
for key, value in self.dict_repr.items():
if key not in self.allowlist:
metadata[key.decode('utf-8')] = value
return metadata
def remove_all(self) -> bool:
cleaned = dict()
with open(self.filename, 'rb') as f:
d = _BencodeHandler().bdecode(f.read())
if d is None:
return False
for k, v in d.items():
if k in self.whitelist:
cleaned[k] = v
for key, value in self.dict_repr.items():
if key in self.allowlist:
cleaned[key] = value
with open(self.output_filename, 'wb') as f:
f.write(_BencodeHandler().bencode(cleaned))
self.dict_repr = cleaned # since we're stateful
return True
class _BencodeHandler(object):
class _BencodeHandler:
"""
Since bencode isn't that hard to parse,
MAT2 comes with its own parser, based on the spec
mat2 comes with its own parser, based on the spec
https://wiki.theory.org/index.php/BitTorrentSpecification#Bencoding
"""
def __init__(self):
@@ -60,8 +59,6 @@ class _BencodeHandler(object):
def __decode_int(s: bytes) -> Tuple[int, bytes]:
s = s[1:]
next_idx = s.index(b'e')
if next_idx is None:
raise ValueError # missing suffix
if s.startswith(b'-0'):
raise ValueError # negative zero doesn't exist
elif s.startswith(b'0') and next_idx != 1:
@@ -70,32 +67,30 @@ class _BencodeHandler(object):
@staticmethod
def __decode_string(s: bytes) -> Tuple[bytes, bytes]:
sep = s.index(b':')
if set is None:
raise ValueError # missing suffix
str_len = int(s[:sep])
if str_len < 0:
raise ValueError
elif s[0] == b'0' and sep != 1:
colon = s.index(b':')
# FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
# but apparently it is. This is utterly idiotic.
if (s[0] == ord('0') or s[0] == '0') and colon != 1:
raise ValueError
str_len = int(s[:colon])
s = s[1:]
return s[sep:sep+str_len], s[sep+str_len:]
return s[colon:colon+str_len], s[colon+str_len:]
def __decode_list(self, s: bytes) -> Tuple[list, bytes]:
r = list()
def __decode_list(self, s: bytes) -> Tuple[List, bytes]:
ret = list()
s = s[1:] # skip leading `l`
while s[0] != ord('e'):
v, s = self.__decode_func[s[0]](s)
r.append(v)
return r, s[1:]
value, s = self.__decode_func[s[0]](s)
ret.append(value)
return ret, s[1:]
def __decode_dict(self, s: bytes) -> Tuple[dict, bytes]:
r = dict()
def __decode_dict(self, s: bytes) -> Tuple[Dict, bytes]:
ret = dict()
s = s[1:] # skip leading `d`
while s[0] != ord(b'e'):
k, s = self.__decode_string(s)
r[k], s = self.__decode_func[s[0]](s)
return r, s[1:]
key, s = self.__decode_string(s)
ret[key], s = self.__decode_func[s[0]](s)
return ret, s[1:]
@staticmethod
def __encode_int(x: bytes) -> bytes:
@@ -113,21 +108,21 @@ class _BencodeHandler(object):
def __encode_dict(self, x: dict) -> bytes:
ret = b''
for k, v in sorted(x.items()):
ret += self.__encode_func[type(k)](k)
ret += self.__encode_func[type(v)](v)
for key, value in sorted(x.items()):
ret += self.__encode_func[type(key)](key)
ret += self.__encode_func[type(value)](value)
return b'd' + ret + b'e'
def bencode(self, s: Union[dict, list, bytes, int]) -> bytes:
def bencode(self, s: Union[Dict, List, bytes, int]) -> bytes:
return self.__encode_func[type(s)](s)
def bdecode(self, s: bytes) -> Union[dict, None]:
def bdecode(self, s: bytes) -> Union[Dict, None]:
try:
r, l = self.__decode_func[s[0]](s)
ret, trail = self.__decode_func[s[0]](s)
except (IndexError, KeyError, ValueError) as e:
logging.debug("Not a valid bencoded string: %s", e)
logging.warning("Not a valid bencoded string: %s", e)
return None
if l != b'':
logging.debug("Invalid bencoded value (data after valid prefix)")
if trail != b'':
logging.warning("Invalid bencoded value (data after valid prefix)")
return None
return r
return ret

144
libmat2/video.py Normal file
View File

@@ -0,0 +1,144 @@
import subprocess
import functools
import shutil
import logging
from typing import Union, Dict
from . import exiftool
from . import bubblewrap
class AbstractFFmpegParser(exiftool.ExiftoolParser):
""" Abstract parser for all FFmpeg-based ones, mainly for video. """
# Some fileformats have mandatory metadata fields
meta_key_value_allowlist: Dict[str, Union[str, int]] = dict()
def remove_all(self) -> bool:
if self.meta_key_value_allowlist:
logging.warning('The format of "%s" (%s) has some mandatory '
'metadata fields; mat2 filled them with standard '
'data.', self.filename, ', '.join(self.mimetypes))
cmd = [_get_ffmpeg_path(),
'-i', self.filename, # input file
'-y', # overwrite existing output file
'-map', '0', # copy everything all streams from input to output
'-codec', 'copy', # don't decode anything, just copy (speed!)
'-loglevel', 'panic', # Don't show log
'-hide_banner', # hide the banner
'-map_metadata', '-1', # remove supperficial metadata
'-map_chapters', '-1', # remove chapters
'-disposition', '0', # Remove dispositions (check ffmpeg's manpage)
'-fflags', '+bitexact', # don't add any metadata
'-flags:v', '+bitexact', # don't add any metadata
'-flags:a', '+bitexact', # don't add any metadata
self.output_filename]
try:
if self.sandbox:
bubblewrap.run(cmd, check=True,
input_filename=self.filename,
output_filename=self.output_filename)
else:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as e:
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
return False
return True
def get_meta(self) -> Dict[str, Union[str, Dict]]:
meta = super().get_meta()
ret: Dict[str, Union[str, Dict]] = dict()
for key, value in meta.items():
if key in self.meta_key_value_allowlist:
if value == self.meta_key_value_allowlist[key]:
continue
ret[key] = value
return ret
class WMVParser(AbstractFFmpegParser):
mimetypes = {'video/x-ms-wmv', }
meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets',
'Directory', 'Duration', 'ExifToolVersion',
'FileAccessDate', 'FileInodeChangeDate', 'FileLength',
'FileModifyDate', 'FileName', 'FilePermissions',
'FileSize', 'FileType', 'FileTypeExtension',
'FrameCount', 'FrameRate', 'ImageHeight', 'ImageSize',
'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize',
'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration',
'SourceFile', 'StreamNumber', 'VideoCodecName', }
meta_key_value_allowlist = { # some metadata are mandatory :/
'AudioCodecDescription': '',
'CreationDate': '0000:00:00 00:00:00Z',
'FileID': '00000000-0000-0000-0000-000000000000',
'Flags': 2, # FIXME: What is this? Why 2?
'ModifyDate': '0000:00:00 00:00:00',
'TimeOffset': '0 s',
'VideoCodecDescription': '',
'StreamType': 'Audio',
}
class AVIParser(AbstractFFmpegParser):
mimetypes = {'video/x-msvideo', }
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
'FileSize', 'FileModifyDate', 'FileAccessDate',
'FileInodeChangeDate', 'FilePermissions', 'FileType',
'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',
'FrameCount', 'StreamCount', 'StreamType', 'VideoCodec',
'VideoFrameRate', 'VideoFrameCount', 'Quality',
'SampleSize', 'BMPVersion', 'ImageWidth', 'ImageHeight',
'Planes', 'BitDepth', 'Compression', 'ImageLength',
'PixelsPerMeterX', 'PixelsPerMeterY',
'NumImportantColors', 'NumColors',
'RedMask', 'GreenMask', 'BlueMask', 'AlphaMask',
'ColorSpace', 'AudioCodec', 'AudioCodecRate',
'AudioSampleCount',
'AudioSampleRate', 'Encoding', 'NumChannels',
'SampleRate', 'AvgBytesPerSec', 'BitsPerSample',
'Duration', 'ImageSize', 'Megapixels'}
class MP4Parser(AbstractFFmpegParser):
mimetypes = {'video/mp4', }
meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
'XResolution', 'YResolution', 'ExifToolVersion',
'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
'FileName', 'FilePermissions', 'MIMEType', 'FileType',
'FileTypeExtension', 'Directory', 'ImageWidth',
'ImageSize', 'ImageHeight', 'FileSize', 'SourceFile',
'BitDepth', 'Duration', 'AudioChannels',
'AudioBitsPerSample', 'AudioSampleRate', 'Megapixels',
'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',
'SourceImageHeight', 'SourceImageWidth',
'MatrixStructure', 'MediaDuration'}
meta_key_value_allowlist = { # some metadata are mandatory :/
'CreateDate': '0000:00:00 00:00:00',
'CurrentTime': '0 s',
'MediaCreateDate': '0000:00:00 00:00:00',
'MediaLanguageCode': 'und',
'MediaModifyDate': '0000:00:00 00:00:00',
'ModifyDate': '0000:00:00 00:00:00',
'OpColor': '0 0 0',
'PosterTime': '0 s',
'PreferredRate': '1',
'PreferredVolume': '100.00%',
'PreviewDuration': '0 s',
'PreviewTime': '0 s',
'SelectionDuration': '0 s',
'SelectionTime': '0 s',
'TrackCreateDate': '0000:00:00 00:00:00',
'TrackModifyDate': '0000:00:00 00:00:00',
'TrackVolume': '0.00%',
}
@functools.lru_cache(maxsize=None)
def _get_ffmpeg_path() -> str: # pragma: no cover
which_path = shutil.which('ffmpeg')
if which_path:
return which_path
raise RuntimeError("Unable to find ffmpeg")

192
libmat2/web.py Normal file
View File

@@ -0,0 +1,192 @@
from html import parser, escape
from typing import Any, Optional, Dict, List, Tuple, Set
import re
import string
from . import abstract
# pylint: disable=too-many-instance-attributes
class CSSParser(abstract.AbstractParser):
"""There is no such things as metadata in CSS files,
only comments of the form `/* … */`, so we're removing the laters."""
mimetypes = {'text/css', }
flags = re.MULTILINE | re.DOTALL
def remove_all(self) -> bool:
with open(self.filename, encoding='utf-8') as f:
try:
content = f.read()
except UnicodeDecodeError: # pragma: no cover
raise ValueError
cleaned = re.sub(r'/\*.*?\*/', '', content, count=0, flags=self.flags)
with open(self.output_filename, 'w', encoding='utf-8') as f:
f.write(cleaned)
return True
def get_meta(self) -> Dict[str, Any]:
metadata = {}
with open(self.filename, encoding='utf-8') as f:
try:
content = f.read()
except UnicodeDecodeError: # pragma: no cover
raise ValueError
cssdoc = re.findall(r'/\*(.*?)\*/', content, self.flags)
for match in cssdoc:
for line in match.splitlines():
try:
k, v = line.split(':')
metadata[k.strip(string.whitespace + '*')] = v.strip()
except ValueError:
metadata['harmful data'] = line.strip()
return metadata
class AbstractHTMLParser(abstract.AbstractParser):
tags_blocklist: Set[str] = set()
# In some html/xml-based formats some tags are mandatory,
# so we're keeping them, but are discarding their content
tags_required_blocklist: Set[str] = set()
def __init__(self, filename):
super().__init__(filename)
self.__parser = _HTMLParser(self.filename, self.tags_blocklist,
self.tags_required_blocklist)
with open(filename, encoding='utf-8') as f:
self.__parser.feed(f.read())
self.__parser.close()
def get_meta(self) -> Dict[str, Any]:
return self.__parser.get_meta()
def remove_all(self) -> bool:
return self.__parser.remove_all(self.output_filename)
class HTMLParser(AbstractHTMLParser):
mimetypes = {'text/html', 'application/xhtml+xml'}
tags_blocklist = {'meta', }
tags_required_blocklist = {'title', }
class DTBNCXParser(AbstractHTMLParser):
mimetypes = {'application/x-dtbncx+xml', }
tags_required_blocklist = {'title', 'doctitle', 'meta'}
class _HTMLParser(parser.HTMLParser):
"""Python doesn't have a validating html parser in its stdlib, so
we're using an internal queue to track all the opening/closing tags,
and hoping for the best.
Moreover, the parser.HTMLParser call doesn't provide a get_endtag_text
method, so we have to use get_starttag_text instead, put its result in a
LIFO, and transform it in a closing tag when needed.
Also, gotcha: the `tag` parameters are always in lowercase.
"""
def __init__(self, filename, blocklisted_tags, required_blocklisted_tags):
super().__init__()
self.filename = filename
self.__textrepr = ''
self.__meta = {}
self.__validation_queue: List[str] = list()
# We're using counters instead of booleans, to handle nested tags
self.__in_dangerous_but_required_tag = 0
self.__in_dangerous_tag = 0
if required_blocklisted_tags & blocklisted_tags: # pragma: nocover
raise ValueError("There is an overlap between %s and %s" % (
required_blocklisted_tags, blocklisted_tags))
self.tag_required_blocklist = required_blocklisted_tags
self.tag_blocklist = blocklisted_tags
def error(self, message): # pragma: no cover
""" Amusingly, Python's documentation doesn't mention that this
function needs to be implemented in subclasses of the parent class
of parser.HTMLParser. This was found by fuzzing,
triggering the following exception:
NotImplementedError: subclasses of ParserBase must override error()
"""
raise ValueError(message)
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
# Ignore the type, because mypy is too stupid to infer
# that get_starttag_text() can't return None.
original_tag = self.get_starttag_text() # type: ignore
self.__validation_queue.append(original_tag) # type: ignore
if tag in self.tag_blocklist:
self.__in_dangerous_tag += 1
if self.__in_dangerous_tag == 0:
if self.__in_dangerous_but_required_tag == 0:
self.__textrepr += original_tag
if tag in self.tag_required_blocklist:
self.__in_dangerous_but_required_tag += 1
def handle_endtag(self, tag: str):
if not self.__validation_queue:
raise ValueError("The closing tag %s doesn't have a corresponding "
"opening one in %s." % (tag, self.filename))
previous_tag = self.__validation_queue.pop()
previous_tag = previous_tag[1:-1] # remove < and >
previous_tag = previous_tag.split(' ')[0] # remove attributes
if tag != previous_tag.lower():
raise ValueError("The closing tag %s doesn't match the previous "
"tag %s in %s" %
(tag, previous_tag, self.filename))
if tag in self.tag_required_blocklist:
self.__in_dangerous_but_required_tag -= 1
if self.__in_dangerous_tag == 0:
if self.__in_dangerous_but_required_tag == 0:
# There is no `get_endtag_text()` method :/
self.__textrepr += '</' + previous_tag + '>'
if tag in self.tag_blocklist:
self.__in_dangerous_tag -= 1
def handle_data(self, data: str):
if self.__in_dangerous_but_required_tag == 0:
if self.__in_dangerous_tag == 0:
if data.strip():
self.__textrepr += escape(data)
def handle_startendtag(self, tag: str,
attrs: List[Tuple[str, Optional[str]]]):
if tag in self.tag_required_blocklist | self.tag_blocklist:
meta = {k:v for k, v in attrs}
name = meta.get('name', 'harmful metadata')
content = meta.get('content', 'harmful data')
self.__meta[name] = content
if self.__in_dangerous_tag == 0:
if tag in self.tag_required_blocklist:
self.__textrepr += '<' + tag + ' />'
return
if self.__in_dangerous_tag == 0:
if self.__in_dangerous_but_required_tag == 0:
self.__textrepr += self.get_starttag_text()
def remove_all(self, output_filename: str) -> bool:
if self.__validation_queue:
raise ValueError("Some tags (%s) were left unclosed in %s" % (
', '.join(self.__validation_queue),
self.filename))
with open(output_filename, 'w', encoding='utf-8') as f:
f.write(self.__textrepr)
return True
def get_meta(self) -> Dict[str, Any]:
if self.__validation_queue:
raise ValueError("Some tags (%s) were left unclosed in %s" % (
', '.join(self.__validation_queue),
self.filename))
return self.__meta

213
mat2
View File

@@ -1,132 +1,231 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import os
from typing import Tuple
import shutil
from typing import List, Set, Dict
import sys
import itertools
import mimetypes
import argparse
import multiprocessing
import logging
import unicodedata
import concurrent.futures
try:
from libmat2 import parser_factory, unsupported_extensions
except ValueError as e:
print(e)
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
from libmat2 import check_dependencies, UnknownMemberPolicy
except ValueError as ex:
print(ex)
sys.exit(1)
__version__ = '0.1.3'
__version__ = '0.13.5'
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)
def __print_without_chars(s: str):
""" Remove control characters
We might use 'Cc' instead of 'C', but better safe than sorry
https://www.unicode.org/reports/tr44/#GC_Values_Table
"""
print(''.join(ch for ch in s if not unicodedata.category(ch).startswith('C')))
def __check_file(filename: str, mode: int = os.R_OK) -> bool:
if not os.path.exists(filename):
print("[-] %s is doesn't exist." % filename)
__print_without_chars("[-] %s doesn't exist." % filename)
return False
elif not os.path.isfile(filename):
print("[-] %s is not a regular file." % filename)
__print_without_chars("[-] %s is not a regular file." % filename)
return False
elif not os.access(filename, mode):
print("[-] %s is not readable and writeable." % filename)
mode_str: List[str] = list()
if mode & os.R_OK:
mode_str += 'readable'
if mode & os.W_OK:
mode_str += 'writeable'
__print_without_chars("[-] %s is not %s." % (filename, 'nor '.join(mode_str)))
return False
return True
def create_arg_parser():
def create_arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
parser.add_argument('files', nargs='*')
parser.add_argument('-v', '--version', action='version',
version='MAT2 %s' % __version__)
parser.add_argument('-l', '--list', action='store_true',
help='list all supported fileformats')
info = parser.add_mutually_exclusive_group()
info.add_argument('-c', '--check', action='store_true',
help='check if a file is free of harmful metadatas')
info.add_argument('-s', '--show', action='store_true',
help='list all the harmful metadata of a file without removing them')
info.add_argument('-L', '--lightweight', action='store_true',
help='remove SOME metadata')
parser.add_argument('-V', '--verbose', action='store_true',
help='show more verbose status information')
parser.add_argument('--unknown-members', metavar='policy', default='abort',
help='how to handle unknown members of archive-style '
'files (policy should be one of: %s) [Default: abort]' %
', '.join(p.value for p in UnknownMemberPolicy))
parser.add_argument('--inplace', action='store_true',
help='clean in place, without backup')
parser.add_argument('--no-sandbox', dest='sandbox', action='store_false',
default=True, help='Disable bubblewrap\'s sandboxing')
excl_group = parser.add_mutually_exclusive_group()
excl_group.add_argument('files', nargs='*', help='the files to process',
default=[])
excl_group.add_argument('-v', '--version', action='version',
version='mat2 %s' % __version__)
excl_group.add_argument('-l', '--list', action='store_true', default=False,
help='list all supported fileformats')
excl_group.add_argument('--check-dependencies', action='store_true',
default=False,
help='check if mat2 has all the dependencies it '
'needs')
excl_group = parser.add_mutually_exclusive_group()
excl_group.add_argument('-L', '--lightweight', action='store_true',
help='remove SOME metadata')
excl_group.add_argument('-s', '--show', action='store_true',
help='list harmful metadata detectable by mat2 '
'without removing them')
return parser
def show_meta(filename: str):
def show_meta(filename: str, sandbox: bool):
if not __check_file(filename):
return
p, mtype = parser_factory.get_parser(filename) # type: ignore
try:
p, mtype = parser_factory.get_parser(filename) # type: ignore
except ValueError as e:
__print_without_chars("[-] something went wrong when processing %s: %s" % (filename, e))
return
if p is None:
print("[-] %s's format (%s) is not supported" % (filename, mtype))
__print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
return
p.sandbox = sandbox
__print_meta(filename, p.get_meta())
def __print_meta(filename: str, metadata: Dict, depth: int = 1):
padding = " " * depth*2
if not metadata:
__print_without_chars(padding + "No metadata found in %s." % filename)
return
print("[+] Metadata for %s:" % filename)
for k, v in p.get_meta().items():
__print_without_chars("[%s] Metadata for %s:" % ('+'*depth, filename))
for (k, v) in sorted(metadata.items()):
if isinstance(v, dict):
__print_meta(k, v, depth+1)
continue
try: # FIXME this is ugly.
print(" %s: %s" % (k, v))
__print_without_chars(padding + " %s: %s" % (k, v))
except UnicodeEncodeError:
print(" %s: harmful content" % k)
__print_without_chars(padding + " %s: harmful content" % k)
except TypeError:
pass # for things that aren't iterable
def clean_meta(params: Tuple[str, bool]) -> bool:
filename, is_lightweigth = params
if not __check_file(filename, os.R_OK|os.W_OK):
def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool,
policy: UnknownMemberPolicy) -> bool:
mode = (os.R_OK | os.W_OK) if inplace else os.R_OK
if not __check_file(filename, mode):
return False
p, mtype = parser_factory.get_parser(filename) # type: ignore
try:
p, mtype = parser_factory.get_parser(filename) # type: ignore
except ValueError as e:
__print_without_chars("[-] something went wrong when cleaning %s: %s" % (filename, e))
return False
if p is None:
print("[-] %s's format (%s) is not supported" % (filename, mtype))
__print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
return False
if is_lightweigth:
return p.remove_all_lightweight()
return p.remove_all()
p.unknown_member_policy = policy
p.lightweight_cleaning = is_lightweight
p.sandbox = sandbox
try:
logging.debug('Cleaning %s…', filename)
ret = p.remove_all()
if ret is True:
shutil.copymode(filename, p.output_filename)
if inplace is True:
os.rename(p.output_filename, filename)
return ret
except RuntimeError as e:
__print_without_chars("[-] %s can't be cleaned: %s" % (filename, e))
return False
def show_parsers():
print('[+] Supported formats:')
formats = list()
for parser in parser_factory._get_parsers():
formats = set() # Set[str]
for parser in parser_factory._get_parsers(): # type: ignore
for mtype in parser.mimetypes:
extensions = set()
extensions = set() # Set[str]
for extension in mimetypes.guess_all_extensions(mtype):
if extension[1:] not in unsupported_extensions: # skip the dot
if extension not in UNSUPPORTED_EXTENSIONS:
extensions.add(extension)
if not extensions:
# we're not supporting a single extension in the current
# mimetype, so there is not point in showing the mimetype at all
continue
formats.append(' - %s (%s)' % (mtype, ', '.join(extensions)))
formats.add(' - %s (%s)' % (mtype, ', '.join(extensions)))
print('\n'.join(sorted(formats)))
def __get_files_recursively(files):
def __get_files_recursively(files: List[str]) -> List[str]:
ret: Set[str] = set()
for f in files:
if os.path.isdir(f):
for path, _, _files in os.walk(f):
for _f in _files:
fname = os.path.join(path, _f)
if __check_file(fname):
yield fname
ret.add(fname)
elif __check_file(f):
yield f
ret.add(f)
return list(ret)
def main():
def main() -> int:
arg_parser = create_arg_parser()
args = arg_parser.parse_args()
if args.verbose:
logging.getLogger(__name__).setLevel(logging.DEBUG)
if not args.files:
if not args.list:
return arg_parser.print_help()
show_parsers()
if args.list:
show_parsers()
return 0
elif args.check_dependencies:
__print_without_chars("Dependencies for mat2 %s:" % __version__)
for key, value in sorted(check_dependencies().items()):
__print_without_chars('- %s: %s %s' % (key, 'yes' if value['found'] else 'no',
'(optional)' if not value['required'] else ''))
else:
arg_parser.print_help()
return 0
elif args.show:
for f in __get_files_recursively(args.files):
show_meta(f)
show_meta(f, args.sandbox)
return 0
else:
p = multiprocessing.Pool()
mode = (args.lightweight is True)
l = zip(__get_files_recursively(args.files), itertools.repeat(mode))
inplace = args.inplace
policy = UnknownMemberPolicy(args.unknown_members)
if policy == UnknownMemberPolicy.KEEP:
logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
no_failure = True
files = __get_files_recursively(args.files)
# We have to use Processes instead of Threads, since
# we're using tempfile.mkdtemp, which isn't thread-safe.
futures = list()
with concurrent.futures.ProcessPoolExecutor() as executor:
for f in files:
future = executor.submit(clean_meta, f, args.lightweight,
inplace, args.sandbox, policy)
futures.append(future)
for future in concurrent.futures.as_completed(futures):
no_failure &= future.result()
return 0 if no_failure is True else -1
ret = list(p.imap_unordered(clean_meta, list(l)))
return 0 if all(ret) else -1
if __name__ == '__main__':
sys.exit(main())

View File

@@ -1,29 +0,0 @@
#!/usr/bin/env python3
import gi
gi.require_version('Nautilus', '3.0')
from gi.repository import Nautilus, GObject
class ColumnExtension(GObject.GObject, Nautilus.MenuProvider):
def menu_activate_cb(self, menu, file):
print "menu_activate_cb", file
# TODO: clean metadata here
def get_background_items(self, window, file):
""" https://bugzilla.gnome.org/show_bug.cgi?id=784278 """
return None
def get_file_items(self, window, files):
if len(files) != 1: # we're not supporting multiple files for now
return
file = files[0]
item = Nautilus.MenuItem(
name="MAT2::Remove_metadata",
label="Remove metadata from %s" % file.get_name(),
tip="Remove metadata from %s" % file.get_name()
)
item.connect('activate', self.menu_activate_cb, file)
return [item]

21
pyproject.toml Normal file
View File

@@ -0,0 +1,21 @@
[project]
name = "mat2"
version = "0.13.5"
description = "mat2 is a metadata removal tool, supporting a wide range of commonly used file formats, written in python3: at its core, it's a library, used by an eponymous command-line interface, as well as several file manager extensions."
readme = "README.md"
license = {file = "LICENSE"}
requires-python = ">=3.9"
dependencies = [
'mutagen',
'PyGObject',
'pycairo',
]
[project.urls]
Repository = "https://github.com/jvoisin/mat2"
Issues = "https://github.com/jvoisin/mat2/issues"
Changelog = "https://github.com/jvoisin/mat2/blob/master/CHANGELOG.md"
[tool.ruff]
target-version = "py39"
# E501 Line too long
ignore = ["E501", "F401", "E402", "E722"]

View File

@@ -1,17 +1,17 @@
import setuptools
with open("README.md", "r") as fh:
with open("README.md", encoding='utf-8') as fh:
long_description = fh.read()
setuptools.setup(
name="mat2",
version='0.1.3',
version='0.13.5',
author="Julien (jvoisin) Voisin",
author_email="julien.voisin+mat2@dustri.org",
description="A handy tool to trash your metadata",
long_description=long_description,
long_description_content_type="text/markdown",
url="https://0xacab.org/jvoisin/mat2",
url="https://github.com/jvoisin/mat2",
python_requires = '>=3.5.0',
scripts=['mat2'],
install_requires=[
@@ -20,7 +20,8 @@ setuptools.setup(
'pycairo',
],
packages=setuptools.find_packages(exclude=('tests', )),
classifiers=(
data_files = [('share/man/man1', ['doc/mat2.1'])],
classifiers=[
"Development Status :: 3 - Alpha",
"Environment :: Console",
"License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
@@ -28,8 +29,8 @@ setuptools.setup(
"Programming Language :: Python :: 3 :: Only",
"Topic :: Security",
"Intended Audience :: End Users/Desktop",
),
],
project_urls={
'bugtacker': 'https://0xacab.org/jvoisin/mat2/issues',
'bugtacker': 'https://github.com/jvoisin/mat2/issues',
},
)

Binary file not shown.

BIN
tests/data/comment.docx Normal file

Binary file not shown.

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

BIN
tests/data/dirty.aiff Normal file

Binary file not shown.

BIN
tests/data/dirty.avi Normal file

Binary file not shown.

14
tests/data/dirty.css Normal file
View File

@@ -0,0 +1,14 @@
/**
* This is my super css framework
* version: 1.0
* author : jvoisin
*/
body {
color: red;
background-color: blue;
}
.underline {
text-decoration: underline; /* underline is cool */
}

BIN
tests/data/dirty.epub Normal file

Binary file not shown.

Binary file not shown.

BIN
tests/data/dirty.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

BIN
tests/data/dirty.heic Normal file

Binary file not shown.

14
tests/data/dirty.html Normal file
View File

@@ -0,0 +1,14 @@
<html>
<head>
<meta content="vim" name="generator"/>
<meta content="jvoisin" name="author"/>
</head>
<body>
<p>
<h1>Hello</h1>
I am a web page.
Please <b>love</b> me.
Here, have a pretty picture: <img src='dirty.jpg' alt='a pretty picture'/>
</p>
</body>
</html>

BIN
tests/data/dirty.mp4 Normal file

Binary file not shown.

8
tests/data/dirty.ppm Normal file
View File

@@ -0,0 +1,8 @@
P3
# A metadata
3 2 1
1 0 1 0 1 0 0 0 1
# And an other one
1 1 0 1 0 1 1 0 0
# and a final one here

636
tests/data/dirty.svg Normal file
View File

@@ -0,0 +1,636 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/2000/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
inkscape:export-ydpi="384"
inkscape:export-xdpi="384"
inkscape:export-filename="mat2.png"
width="128"
height="128"
id="svg11300"
sodipodi:version="0.32"
inkscape:version="0.92.3 (2405546, 2018-03-11)"
sodipodi:docname="dirty.svg"
inkscape:output_extension="org.inkscape.output.svg.inkscape"
version="1.0"
style="display:inline;enable-background:new"
viewBox="0 0 128 128">
<script
id="script4600" />
<title
id="title4162">Adwaita Icon Template</title>
<defs
id="defs3" />
<sodipodi:namedview
stroke="#ef2929"
fill="#f57900"
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="0.25490196"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="4.0446508"
inkscape:cx="61.536232"
inkscape:cy="41.548134"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:grid-bbox="true"
inkscape:document-units="px"
inkscape:showpageshadow="false"
inkscape:window-width="1366"
inkscape:window-height="747"
inkscape:window-x="0"
inkscape:window-y="21"
width="400px"
height="300px"
inkscape:snap-nodes="true"
inkscape:snap-bbox="false"
objecttolerance="7"
gridtolerance="12"
guidetolerance="13"
inkscape:window-maximized="1"
inkscape:pagecheckerboard="false"
showguides="true"
inkscape:guide-bbox="true"
inkscape:locked="false"
inkscape:measure-start="0,0"
inkscape:measure-end="0,0"
inkscape:object-nodes="true"
inkscape:bbox-nodes="true"
inkscape:snap-global="true"
inkscape:object-paths="true"
inkscape:snap-intersection-paths="true"
inkscape:snap-bbox-edge-midpoints="true"
inkscape:snap-bbox-midpoints="true"
showborder="false"
inkscape:snap-center="true"
inkscape:snap-object-midpoints="true"
inkscape:snap-midpoints="true"
inkscape:snap-smooth-nodes="true">
<inkscape:grid
type="xygrid"
id="grid5883"
spacingx="2"
spacingy="2"
enabled="true"
visible="true"
empspacing="4"
originx="0"
originy="0" />
<sodipodi:guide
position="64,8"
orientation="0,1"
id="guide1073"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="12,64"
orientation="1,0"
id="guide1075"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,104"
orientation="0,1"
id="guide1099"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,128"
orientation="0,1"
id="guide993"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="104,64"
orientation="1,0"
id="guide995"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="9.2651362e-08,64"
orientation="1,0"
id="guide867"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="120,64"
orientation="1,0"
id="guide869"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,116"
orientation="0,1"
id="guide871"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<inkscape:grid
type="xygrid"
id="grid873"
spacingx="1"
spacingy="1"
empspacing="8"
color="#000000"
opacity="0.49019608"
empcolor="#000000"
empopacity="0.08627451"
dotted="true" />
<sodipodi:guide
position="24,64"
orientation="1,0"
id="guide877"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="116,64"
orientation="1,0"
id="guide879"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,120"
orientation="0,1"
id="guide881"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,12"
orientation="0,1"
id="guide883"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="8,64"
orientation="1,0"
id="guide885"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="128,64"
orientation="1,0"
id="guide887"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,0"
orientation="0,1"
id="guide897"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,24"
orientation="0,1"
id="guide899"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="256,256"
orientation="-0.70710678,0.70710678"
id="guide950"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,64"
orientation="0.70710678,0.70710678"
id="guide952"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
</sodipodi:namedview>
<metadata
id="metadata4">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:creator>
<cc:Agent>
<dc:title>GNOME Design Team</dc:title>
</cc:Agent>
</dc:creator>
<dc:source>mat2's source code</dc:source>
<cc:license
rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
<dc:title>Adwaita Icon Template</dc:title>
<dc:subject>
<rdf:Bag>
<rdf:li>mat2</rdf:li>
<rdf:li>logo</rdf:li>
<rdf:li>metadata</rdf:li>
</rdf:Bag>
</dc:subject>
<dc:date>2019 07 13</dc:date>
<dc:rights>
<cc:Agent>
<dc:title>LGPL</dc:title>
</cc:Agent>
</dc:rights>
<dc:publisher>
<cc:Agent>
<dc:title>jvoisin</dc:title>
</cc:Agent>
</dc:publisher>
<dc:identifier>mat2-testdata-svg</dc:identifier>
<dc:relation />
<dc:language>English</dc:language>
<dc:coverage />
<dc:description>This is a test svg image for mat2's testsuite</dc:description>
<dc:contributor>
<cc:Agent>
<dc:title>jvoisin, and Rose for the design</dc:title>
</cc:Agent>
</dc:contributor>
</cc:Work>
<cc:License
rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
<cc:permits
rdf:resource="http://creativecommons.org/ns#Reproduction" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#Distribution" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Notice" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Attribution" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#ShareAlike" />
</cc:License>
</rdf:RDF>
</metadata>
<g
id="layer1"
inkscape:label="Icon"
inkscape:groupmode="layer"
style="display:inline"
transform="translate(0,-172)">
<g
inkscape:groupmode="layer"
id="layer2"
inkscape:label="baseplate"
style="display:none">
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Normal';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
x="7.9499588"
y="148.65199"
id="context"
inkscape:label="context"><tspan
sodipodi:role="line"
id="tspan2716"
x="7.9499588"
y="148.65199"
style="font-size:5.33333349px;stroke-width:0.33264872">apps</tspan></text>
<text
inkscape:label="icon-name"
id="text3021"
y="157.23398"
x="7.7533054"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
xml:space="preserve"><tspan
y="157.23398"
x="7.7533054"
id="tspan3023"
sodipodi:role="line"
style="font-size:5.33333349px;stroke-width:0.33264872">org.gnome.</tspan></text>
<g
style="display:inline;fill:#000000;enable-background:new"
transform="matrix(7.9911709,0,0,8.0036407,-167.7909,-4846.0776)"
id="g12027"
inkscape:export-xdpi="12"
inkscape:export-ydpi="12" />
<rect
style="display:inline;overflow:visible;visibility:visible;fill:#f0f0f0;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.5;marker:none;enable-background:accumulate"
id="rect13805"
width="128"
height="128"
x="9.2651362e-08"
y="172"
inkscape:label="512x512" />
<g
id="g883"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g900"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g1168"
transform="matrix(0.25,0,0,0.25,6.9488522e-8,225)">
<circle
cx="256"
cy="44"
r="240"
id="path1142"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-180"
x="96"
height="448"
width="319.99979"
id="rect1110"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-164"
x="48"
height="416"
width="416"
id="rect1110-8"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
<rect
ry="32"
rx="32"
y="-116"
x="32"
height="320"
width="448"
id="rect1110-8-9"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
</g>
</g>
<g
inkscape:groupmode="layer"
id="layer9"
inkscape:label="hires"
style="display:none" />
<g
id="g944"
transform="matrix(1,0,0,0.93868822,0,14.545966)">
<path
style="fill:#99c1f1;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.41013032;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 50.53899,195.25817 6.396029,-11.43484 1.082405,-0.87215 4.821622,-10.46578 0.885604,-0.38763 2.558412,4.74837 2.755213,9.59364 1.672808,1.35667 3.542417,-0.87215 5.707227,12.59771 12.988859,9.59364 3.050415,3.87621 v 2.71335 l -16.334476,-1.25977 -7.084833,1.45359 -4.428021,-0.38763 -7.084833,0.29072 -11.414452,-0.58143 -3.640817,0.96905 -9.052843,-1.64739 -2.066409,0.0969 -1.476008,-0.48452 1.377607,-1.45358 1.869609,-1.06596 6.002428,-11.04722 1.279206,0.48453 5.412025,-6.49267 z"
id="path3455"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 49.358184,215.31759 -3.444016,0.9206 -9.003641,-1.74429 -1.918809,0.24226 -1.623608,-0.58143 1.574407,-1.50204 1.722008,-0.96905 5.953228,-11.09567 1.279205,0.53298 5.510426,-6.54112 0.344401,0.29072 -4.969223,10.27197 2.214011,1.93811 -0.246001,4.45765 z"
id="path3459"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.438601,195.22912 6.470906,-11.5803 1.113274,-0.6167 4.870575,-10.62099 0.904535,-0.41113 -0.417479,3.3576 0.626218,0.89079 0.834954,15.89722 1.391594,3.70021 -3.687722,5.34476 0.208739,1.37044 -0.347898,5.68737 1.87865,3.28908 7.375442,2.19272 1.252433,2.19272 -0.487057,0.13704 -4.244358,-0.54818 -6.540486,0.41114 -2.435287,-2.19272 -0.626216,-4.24839 -2.087389,-6.16703 -4.035619,-3.42612 -2.087388,-4.38544"
id="path3461"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 32.069579,218.11563 c -0.06958,-0.27409 0.695796,-1.23341 0.695796,-1.23341 l 2.783185,-0.0685 1.739491,2.26124 4.661836,5.13919 0.139158,1.57602 -4.174778,5.96145 -0.487057,6.16703 -2.922344,2.26124 -0.06958,1.57601 h -1.113274 l -1.322013,-3.08351 2.017809,-14.86938 z"
id="path3400"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.83827,222.43255 1.600331,-3.01499 -0.695796,-0.75375 -5.635951,-1.16488 -3.200663,0.82227 -0.06958,1.50749 1.53075,0.75375 1.461174,2.67237 -0.208739,1.71307 1.739489,1.02783 2.296129,-0.54818 z"
id="path3402"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.203977,217.70449 1.113274,-0.68522 2.365707,1.02784 1.322013,2.67237 -2.226548,2.26125 -1.322013,-0.82227 -1.322013,-0.61671 0.834956,-1.71306 z"
id="path3404"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 43.758957,226.61242 1.948228,0.68522 0.417479,1.91863 -0.626216,1.30193 -1.182854,0.34261 -1.113275,1.02784 -0.765376,3.63169 0.626218,3.01499 -1.252435,0.68522 -0.487057,-0.41113 -0.278319,-1.5075 -1.80907,-1.37045 -0.765376,-3.49464 3.618141,-3.42613 1.669912,-2.67237"
id="path3406"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.57776,223.25481 0.13916,0.68523 -2.783187,3.83726 0.06958,1.64454 -0.626218,1.50749 -1.60033,1.43897 -0.06958,0.75375 1.600333,1.91863 1.182854,3.08351 0.974114,0.68523 1.669911,-2.80942 -0.278318,-3.22056 3.966039,-3.3576 0.695796,-1.09636 -3.270243,-4.45396 z"
id="path3408"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.839954,236.39856 -0.834826,1.58948 0.166966,1.26061 1.057445,1.97315 0.500896,-0.32886 0.389584,-1.7539 1.447031,-1.151 2.337512,-4.0559 -0.22262,-1.04138 -1.947927,-1.69909 -2.114892,1.31542 0.278276,3.39819 z"
id="path3410"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.593778,229.84236 -1.043694,1.09636 0.765375,0.89079 1.043695,-0.20556 v -1.43898 z"
id="path3412"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.344793,218.25267 -0.765376,2.19272 -0.695796,0.27409 -0.695796,1.91863 -2.226548,2.26124 2.574446,3.56317 h 1.182854 l 0.487057,0.75375 0.626217,1.09636 1.948229,1.30193 2.922346,-0.6167 1.53075,-2.26125 -1.043694,-3.3576 -1.043693,-1.64454 1.322011,-2.60385 -0.904535,-1.37045 -2.226548,0.0685 z"
id="path3416"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.150522,238.17554 -0.518261,1.78635 1.036524,2.16915 1.684349,-2.04155 -0.647826,-2.16915 z"
id="path3418"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.789813,223.66595 1.600333,-0.75375 1.739489,-4.11135 2.922346,0.75375 1.322013,0.41114 0.139159,6.7152 -1.461172,1.02784 -2.226548,4.17987 -0.834956,-0.41114 -0.626216,0.95932 -2.574448,-0.61671 0.904537,-3.08351 z"
id="path3422"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 77.505077,218.59529 1.182854,-0.20557 2.435287,1.30193 -0.974115,1.02783 -2.087389,3.63169 -1.391593,0.0685 -1.113274,-0.61671 1.043695,-2.19271 z"
id="path3426"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 73.539038,231.06638 1.043695,-1.30193 1.043694,-2.80942 4.522676,1.71306 -0.974115,2.87795 -1.94823,-0.41114 -1.80907,1.09636 z"
id="path3428"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.200873,225.6531 7.932079,-7.94861 3.339822,1.09636 0.974115,0.13705 1.600331,-1.02784 3.339822,0.0685 -5.079314,12.81371 -3.200663,-1.98715 0.139161,-1.16489 -0.695798,-0.6167 -0.208737,-1.16488 -1.043696,0.27409 -3.200663,2.39829 z"
id="path3430"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.401536,230.99786 c 0,-0.2741 2.156968,-1.98716 2.156968,-1.98716 l 2.017811,1.30193 -0.904535,2.32976 -1.182855,0.75375 z"
id="path3432"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.679855,237.8501 0.765375,-1.91863 0.208739,-1.2334 2.156969,0.20557 2.156968,-2.87795 3.409403,1.02784 -0.904535,2.80942 -0.904535,0.34261 -0.626218,2.80943 1.043694,4.72805 -0.904535,1.09636 -1.80907,-2.19272 -0.626217,-1.37045 z"
id="path3434"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.131294,238.60385 0.626216,3.08351 -0.626216,3.22056 0.765375,0.95931 -0.626216,5.68737 2.504866,2.32976 1.87865,-0.47965 0.417478,-3.35761 1.669911,-0.0685 3.757301,-1.8501 -0.20874,-1.98716 -2.226548,-0.20556 -1.182854,-3.01499 -3.200662,-2.05568 -1.252434,-2.39828 z"
id="path3436"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 84.532619,251.41755 -0.278318,1.43898 -0.695797,0.6167 1.322013,2.67238 2.365709,-0.20557 1.53075,-2.94647 -2.365707,-1.98715 z"
id="path3438"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 64.993183,249.51154 -1.14959,2.51583 0.766392,1.69818 2.618509,0.25159 0.702526,1.19502 1.021857,2.39003 -0.574794,2.32714 3.89583,1.88688 0.95799,-1.06923 0.510928,-4.59139 -4.023561,-2.70451 -0.127732,-4.21402 z"
id="path3440"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.912822,251.00642 h 1.391592 l 2.574446,0.75375 1.391593,1.98715 1.461172,1.30193 -0.139159,3.42612 -3.409402,1.57602 -0.974115,-1.85011 0.626217,-3.3576 -3.270243,-1.85011 z"
id="path3442"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.147446,264.77944 1.80907,-1.98715 3.339822,-1.85011 1.322013,-0.0685 4.661835,-3.63169 1.391594,0.34261 0.556637,4.52248 -3.200664,4.04283 -2.852765,-0.82227 -1.80907,0.54818 -0.765376,1.43897 -2.087389,0.68522 z"
id="path3444"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 75.06979,272.93361 0.765376,-1.30192 1.252433,-0.41114 0.904535,-2.87794 1.94823,-0.61671 0.556637,2.60386 -3.339822,6.0985 -1.391593,-0.0685 z"
id="path3446"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.451649,268.20556 -1.252433,1.85011 2.504867,1.98715 0.765376,0.82227 1.73949,-2.39829 -2.296127,-2.80942 -1.461173,0.27409 z"
id="path3448"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.24531,254.0948 1.461172,1.02784 1.948229,0.54818 0.487058,1.64454 -1.461173,2.67237 -0.06958,1.78159 -1.669911,1.85011 -1.252433,-2.05568 0.487057,-2.80942 -1.391593,-0.34261 -0.904535,-2.80942 z"
id="path3450"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 47.585836,246.55246 -0.695796,3.70021 -0.139159,1.37045 1.87865,0.68523 1.391592,0.95931 1.809071,-1.64454 -0.417478,-0.95931 z"
id="path3452"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 54.682958,247.78586 -1.043694,1.02784 0.208739,1.98715 1.600331,0.89079 0.626217,-0.47965 0.06958,-2.26125 z"
id="path3454"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.629531,258.95503 4.800994,-6.16703 3.409402,0.82227 0.556637,1.78159 3.131083,4.79657 -1.669911,5.82441 -3.200663,-1.37045 -0.417478,-3.49464 -2.087388,1.30192 z"
id="path3456"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,252.71948 -0.487056,1.98715 1.60033,1.57602 1.461174,-0.20557 -0.347899,-2.19272 z"
id="path3458"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 67.555189,261.6274 -1.80907,2.80943 -2.435287,8.42826 2.783185,3.76874 1.461172,-0.0685 1.113274,-2.12419 1.043696,-0.20557 0.487057,-1.09636 -1.043694,-4.45396 1.182853,-4.31692 z"
id="path3460"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.718577,267.79443 1.600331,-1.23341 2.017809,1.71306 -0.904535,1.85011 z"
id="path3462"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.509838,276.49678 2.156968,-4.591 1.391593,-0.27409 0.834955,1.50749 -2.017809,5.13919 z"
id="path3464"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.242911,274.02997 1.391592,0.20557 1.043694,3.01499 2.01781,0.68522 1.530751,1.57602 -0.904535,2.87795 -2.365707,2.32976 -0.139159,3.56317 -1.322013,1.98715 -2.504867,-1.85011 -0.278318,-2.67237 -1.530752,-1.78159 -1.113274,-3.08351 3.61814,-4.17987 z"
id="path3466"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.893354,276.5653 3.270244,1.16489 0.06958,3.70021 -0.556637,0.68523 0.974115,3.70021 1.252433,1.64454 0.06958,3.08351 -2.017809,1.37045 -2.574447,8.08566 -2.574447,-1.30193 -1.948229,-9.79872 z"
id="path3468"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.440258,283.5546 h 0.556637 l 0.417478,0.95931 -0.208739,1.30193 -1.461172,0.13704 z"
id="path3472"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 56.700767,279.16916 -1.113274,0.95931 0.834956,2.80943 1.600331,0.20556 0.487058,-2.05567 -0.695796,-1.91863 z"
id="path3474"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 53.152207,272.17987 0.139159,5.13918 1.87865,1.23341 0.834955,-0.54818 0.904535,-3.63169 1.530752,-1.57602 -1.669911,-3.97431 -3.548561,3.08352 z"
id="path3476"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,258.33832 -0.208739,3.83726 -4.731414,3.97431 1.948229,2.80942 8.488716,0.82227 0.417478,1.98715 1.043694,-0.75375 0.487057,-2.19272 1.182854,-1.64454 -0.417478,-1.09635 -1.87865,-2.60386 -3.757299,-1.37045 -1.461174,-3.22056 z"
id="path3480"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 40.279975,263.68308 1.669912,0.6167 3.061502,-6.37259 -0.904535,-5.61884 -2.504867,-0.34262 -1.391592,-1.2334 2.156968,-7.606 -2.087388,-4.45396 -3.409402,1.57602 -0.834956,3.42612 -1.87865,0.20557 -0.347898,2.1242 1.530752,1.64454 h 1.322013 l 0.626217,3.90578 2.296127,5.61884 -0.347898,2.19272 z"
id="path3482"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.531337,247.61066 -0.590018,-0.31657 -0.420783,-1.71262 0.427793,-0.66945 1.306823,-1.13114 2.316342,-1.38746 1.06612,0.23465 -0.01701,2.21105 -2.36166,3.35302 z"
id="path4284"
inkscape:connector-curvature="0"
inkscape:transform-center-x="4.9927099"
inkscape:transform-center-y="-9.3161687" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.373733,232.22199 -0.815102,1.03206 4.017286,4.12827 1.571981,0.17201 1.339096,-0.86006 0.931544,0.63071 2.387083,-2.98152 -2.794634,-0.91739 -3.027519,0.22934 z"
id="path3601"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.407878,237.1102 -1.301737,2.34289 -1.301738,0.61888 -0.17955,1.45878 -4.488748,1.54719 -0.403989,1.50299 0.314213,0.30944 1.032412,0.0884 v 1.41457 l 1.660839,1.50299 2.154598,-1.94504 1.571064,0.35364 2.738136,-1.94504 -1.436399,-2.56392 0.987525,-3.44803 -0.583538,-1.37037 z"
id="path3603"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.104217,246.96003 5.843936,-6.55723 0.659867,-2.66044 2.221783,-0.40757 -0.386451,-3.39556 -2.000988,-0.60704 -6.246127,-0.36572 -2.624948,2.5137 1.519708,2.75102 -0.347742,5.51876 z"
id="path3605"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.024647,249.63275 5.822153,1.31875 1.047988,-3.89891 -1.280874,-1.43343 0.523995,-6.02038 -3.551515,5.275 0.34933,2.06413 -2.037753,0.80272 -1.164431,0.45869 z"
id="path3607"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.099222,247.24427 -2.095974,1.72011 -0.05822,1.60543 0.465772,1.72011 1.455539,0.97473 -0.407551,0.97473 2.328861,-0.34402 2.27064,-2.86685 -1.571981,-0.57337 -0.640437,-2.86685 -1.51376,-0.40136 z"
id="path3609"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.078067,234.34346 0.291107,4.47228 -1.863089,1.43342 2.095976,3.72691 2.037753,0.0573 2.27064,-3.55489 -2.969297,-4.98831 z"
id="path3611"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.660282,245.46683 -3.318627,4.30027 1.339096,1.26141 2.561747,-0.28668 1.222652,-3.15354 z"
id="path3613"
inkscape:connector-curvature="0" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 34 KiB

BIN
tests/data/dirty.wav Normal file

Binary file not shown.

BIN
tests/data/dirty.webp Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

BIN
tests/data/dirty.wmv Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

636
tests/data/weird.svg Normal file
View File

@@ -0,0 +1,636 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:cc="http://creativecommons.org/ns#"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:svg="http://www.w3.org/2000/svg"
xmlns="http://www.w3.org/1337/svg"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
inkscape:export-ydpi="384"
inkscape:export-xdpi="384"
inkscape:export-filename="mat2.png"
width="128"
height="128"
id="svg11300"
sodipodi:version="0.32"
inkscape:version="0.92.3 (2405546, 2018-03-11)"
sodipodi:docname="dirty.svg"
inkscape:output_extension="org.inkscape.output.svg.inkscape"
version="1.0"
style="display:inline;enable-background:new"
viewBox="0 0 128 128">
<script
id="script4600" />
<title
id="title4162">Adwaita Icon Template</title>
<defs
id="defs3" />
<sodipodi:namedview
stroke="#ef2929"
fill="#f57900"
id="base"
pagecolor="#ffffff"
bordercolor="#666666"
borderopacity="0.25490196"
inkscape:pageopacity="0.0"
inkscape:pageshadow="2"
inkscape:zoom="4.0446508"
inkscape:cx="61.536232"
inkscape:cy="41.548134"
inkscape:current-layer="layer1"
showgrid="true"
inkscape:grid-bbox="true"
inkscape:document-units="px"
inkscape:showpageshadow="false"
inkscape:window-width="1366"
inkscape:window-height="747"
inkscape:window-x="0"
inkscape:window-y="21"
width="400px"
height="300px"
inkscape:snap-nodes="true"
inkscape:snap-bbox="false"
objecttolerance="7"
gridtolerance="12"
guidetolerance="13"
inkscape:window-maximized="1"
inkscape:pagecheckerboard="false"
showguides="true"
inkscape:guide-bbox="true"
inkscape:locked="false"
inkscape:measure-start="0,0"
inkscape:measure-end="0,0"
inkscape:object-nodes="true"
inkscape:bbox-nodes="true"
inkscape:snap-global="true"
inkscape:object-paths="true"
inkscape:snap-intersection-paths="true"
inkscape:snap-bbox-edge-midpoints="true"
inkscape:snap-bbox-midpoints="true"
showborder="false"
inkscape:snap-center="true"
inkscape:snap-object-midpoints="true"
inkscape:snap-midpoints="true"
inkscape:snap-smooth-nodes="true">
<inkscape:grid
type="xygrid"
id="grid5883"
spacingx="2"
spacingy="2"
enabled="true"
visible="true"
empspacing="4"
originx="0"
originy="0" />
<sodipodi:guide
position="64,8"
orientation="0,1"
id="guide1073"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="12,64"
orientation="1,0"
id="guide1075"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,104"
orientation="0,1"
id="guide1099"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,128"
orientation="0,1"
id="guide993"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="104,64"
orientation="1,0"
id="guide995"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="9.2651362e-08,64"
orientation="1,0"
id="guide867"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="120,64"
orientation="1,0"
id="guide869"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,116"
orientation="0,1"
id="guide871"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<inkscape:grid
type="xygrid"
id="grid873"
spacingx="1"
spacingy="1"
empspacing="8"
color="#000000"
opacity="0.49019608"
empcolor="#000000"
empopacity="0.08627451"
dotted="true" />
<sodipodi:guide
position="24,64"
orientation="1,0"
id="guide877"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="116,64"
orientation="1,0"
id="guide879"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,120"
orientation="0,1"
id="guide881"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,12"
orientation="0,1"
id="guide883"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="8,64"
orientation="1,0"
id="guide885"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="128,64"
orientation="1,0"
id="guide887"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,0"
orientation="0,1"
id="guide897"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,24"
orientation="0,1"
id="guide899"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="256,256"
orientation="-0.70710678,0.70710678"
id="guide950"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
<sodipodi:guide
position="64,64"
orientation="0.70710678,0.70710678"
id="guide952"
inkscape:locked="false"
inkscape:label=""
inkscape:color="rgb(0,0,255)" />
</sodipodi:namedview>
<metadata
id="metadata4">
<rdf:RDF>
<cc:Work
rdf:about="">
<dc:format>image/svg+xml</dc:format>
<dc:type
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
<dc:creator>
<cc:Agent>
<dc:title>GNOME Design Team</dc:title>
</cc:Agent>
</dc:creator>
<dc:source>mat2's source code</dc:source>
<cc:license
rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
<dc:title>Adwaita Icon Template</dc:title>
<dc:subject>
<rdf:Bag>
<rdf:li>mat2</rdf:li>
<rdf:li>logo</rdf:li>
<rdf:li>metadata</rdf:li>
</rdf:Bag>
</dc:subject>
<dc:date>2019 07 13</dc:date>
<dc:rights>
<cc:Agent>
<dc:title>LGPL</dc:title>
</cc:Agent>
</dc:rights>
<dc:publisher>
<cc:Agent>
<dc:title>jvoisin</dc:title>
</cc:Agent>
</dc:publisher>
<dc:identifier>mat2-testdata-svg</dc:identifier>
<dc:relation />
<dc:language>English</dc:language>
<dc:coverage />
<dc:description>This is a test svg image for mat2's testsuite</dc:description>
<dc:contributor>
<cc:Agent>
<dc:title>jvoisin, and Rose for the design</dc:title>
</cc:Agent>
</dc:contributor>
</cc:Work>
<cc:License
rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
<cc:permits
rdf:resource="http://creativecommons.org/ns#Reproduction" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#Distribution" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Notice" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#Attribution" />
<cc:permits
rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
<cc:requires
rdf:resource="http://creativecommons.org/ns#ShareAlike" />
</cc:License>
</rdf:RDF>
</metadata>
<g
id="layer1"
inkscape:label="Icon"
inkscape:groupmode="layer"
style="display:inline"
transform="translate(0,-172)">
<g
inkscape:groupmode="layer"
id="layer2"
inkscape:label="baseplate"
style="display:none">
<text
xml:space="preserve"
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Normal';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
x="7.9499588"
y="148.65199"
id="context"
inkscape:label="context"><tspan
sodipodi:role="line"
id="tspan2716"
x="7.9499588"
y="148.65199"
style="font-size:5.33333349px;stroke-width:0.33264872">apps</tspan></text>
<text
inkscape:label="icon-name"
id="text3021"
y="157.23398"
x="7.7533054"
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
xml:space="preserve"><tspan
y="157.23398"
x="7.7533054"
id="tspan3023"
sodipodi:role="line"
style="font-size:5.33333349px;stroke-width:0.33264872">org.gnome.</tspan></text>
<g
style="display:inline;fill:#000000;enable-background:new"
transform="matrix(7.9911709,0,0,8.0036407,-167.7909,-4846.0776)"
id="g12027"
inkscape:export-xdpi="12"
inkscape:export-ydpi="12" />
<rect
style="display:inline;overflow:visible;visibility:visible;fill:#f0f0f0;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.5;marker:none;enable-background:accumulate"
id="rect13805"
width="128"
height="128"
x="9.2651362e-08"
y="172"
inkscape:label="512x512" />
<g
id="g883"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g900"
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
transform="translate(-24,24)" />
<g
id="g1168"
transform="matrix(0.25,0,0,0.25,6.9488522e-8,225)">
<circle
cx="256"
cy="44"
r="240"
id="path1142"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-180"
x="96"
height="448"
width="319.99979"
id="rect1110"
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
<rect
ry="32"
rx="32"
y="-164"
x="48"
height="416"
width="416"
id="rect1110-8"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
<rect
ry="32"
rx="32"
y="-116"
x="32"
height="320"
width="448"
id="rect1110-8-9"
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
</g>
</g>
<g
inkscape:groupmode="layer"
id="layer9"
inkscape:label="hires"
style="display:none" />
<g
id="g944"
transform="matrix(1,0,0,0.93868822,0,14.545966)">
<path
style="fill:#99c1f1;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.41013032;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
d="m 50.53899,195.25817 6.396029,-11.43484 1.082405,-0.87215 4.821622,-10.46578 0.885604,-0.38763 2.558412,4.74837 2.755213,9.59364 1.672808,1.35667 3.542417,-0.87215 5.707227,12.59771 12.988859,9.59364 3.050415,3.87621 v 2.71335 l -16.334476,-1.25977 -7.084833,1.45359 -4.428021,-0.38763 -7.084833,0.29072 -11.414452,-0.58143 -3.640817,0.96905 -9.052843,-1.64739 -2.066409,0.0969 -1.476008,-0.48452 1.377607,-1.45358 1.869609,-1.06596 6.002428,-11.04722 1.279206,0.48453 5.412025,-6.49267 z"
id="path3455"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 49.358184,215.31759 -3.444016,0.9206 -9.003641,-1.74429 -1.918809,0.24226 -1.623608,-0.58143 1.574407,-1.50204 1.722008,-0.96905 5.953228,-11.09567 1.279205,0.53298 5.510426,-6.54112 0.344401,0.29072 -4.969223,10.27197 2.214011,1.93811 -0.246001,4.45765 z"
id="path3459"
inkscape:connector-curvature="0" />
<path
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.438601,195.22912 6.470906,-11.5803 1.113274,-0.6167 4.870575,-10.62099 0.904535,-0.41113 -0.417479,3.3576 0.626218,0.89079 0.834954,15.89722 1.391594,3.70021 -3.687722,5.34476 0.208739,1.37044 -0.347898,5.68737 1.87865,3.28908 7.375442,2.19272 1.252433,2.19272 -0.487057,0.13704 -4.244358,-0.54818 -6.540486,0.41114 -2.435287,-2.19272 -0.626216,-4.24839 -2.087389,-6.16703 -4.035619,-3.42612 -2.087388,-4.38544"
id="path3461"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 32.069579,218.11563 c -0.06958,-0.27409 0.695796,-1.23341 0.695796,-1.23341 l 2.783185,-0.0685 1.739491,2.26124 4.661836,5.13919 0.139158,1.57602 -4.174778,5.96145 -0.487057,6.16703 -2.922344,2.26124 -0.06958,1.57601 h -1.113274 l -1.322013,-3.08351 2.017809,-14.86938 z"
id="path3400"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.83827,222.43255 1.600331,-3.01499 -0.695796,-0.75375 -5.635951,-1.16488 -3.200663,0.82227 -0.06958,1.50749 1.53075,0.75375 1.461174,2.67237 -0.208739,1.71307 1.739489,1.02783 2.296129,-0.54818 z"
id="path3402"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.203977,217.70449 1.113274,-0.68522 2.365707,1.02784 1.322013,2.67237 -2.226548,2.26125 -1.322013,-0.82227 -1.322013,-0.61671 0.834956,-1.71306 z"
id="path3404"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 43.758957,226.61242 1.948228,0.68522 0.417479,1.91863 -0.626216,1.30193 -1.182854,0.34261 -1.113275,1.02784 -0.765376,3.63169 0.626218,3.01499 -1.252435,0.68522 -0.487057,-0.41113 -0.278319,-1.5075 -1.80907,-1.37045 -0.765376,-3.49464 3.618141,-3.42613 1.669912,-2.67237"
id="path3406"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 50.57776,223.25481 0.13916,0.68523 -2.783187,3.83726 0.06958,1.64454 -0.626218,1.50749 -1.60033,1.43897 -0.06958,0.75375 1.600333,1.91863 1.182854,3.08351 0.974114,0.68523 1.669911,-2.80942 -0.278318,-3.22056 3.966039,-3.3576 0.695796,-1.09636 -3.270243,-4.45396 z"
id="path3408"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 51.839954,236.39856 -0.834826,1.58948 0.166966,1.26061 1.057445,1.97315 0.500896,-0.32886 0.389584,-1.7539 1.447031,-1.151 2.337512,-4.0559 -0.22262,-1.04138 -1.947927,-1.69909 -2.114892,1.31542 0.278276,3.39819 z"
id="path3410"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.593778,229.84236 -1.043694,1.09636 0.765375,0.89079 1.043695,-0.20556 v -1.43898 z"
id="path3412"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.344793,218.25267 -0.765376,2.19272 -0.695796,0.27409 -0.695796,1.91863 -2.226548,2.26124 2.574446,3.56317 h 1.182854 l 0.487057,0.75375 0.626217,1.09636 1.948229,1.30193 2.922346,-0.6167 1.53075,-2.26125 -1.043694,-3.3576 -1.043693,-1.64454 1.322011,-2.60385 -0.904535,-1.37045 -2.226548,0.0685 z"
id="path3416"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.150522,238.17554 -0.518261,1.78635 1.036524,2.16915 1.684349,-2.04155 -0.647826,-2.16915 z"
id="path3418"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.789813,223.66595 1.600333,-0.75375 1.739489,-4.11135 2.922346,0.75375 1.322013,0.41114 0.139159,6.7152 -1.461172,1.02784 -2.226548,4.17987 -0.834956,-0.41114 -0.626216,0.95932 -2.574448,-0.61671 0.904537,-3.08351 z"
id="path3422"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 77.505077,218.59529 1.182854,-0.20557 2.435287,1.30193 -0.974115,1.02783 -2.087389,3.63169 -1.391593,0.0685 -1.113274,-0.61671 1.043695,-2.19271 z"
id="path3426"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 73.539038,231.06638 1.043695,-1.30193 1.043694,-2.80942 4.522676,1.71306 -0.974115,2.87795 -1.94823,-0.41114 -1.80907,1.09636 z"
id="path3428"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.200873,225.6531 7.932079,-7.94861 3.339822,1.09636 0.974115,0.13705 1.600331,-1.02784 3.339822,0.0685 -5.079314,12.81371 -3.200663,-1.98715 0.139161,-1.16489 -0.695798,-0.6167 -0.208737,-1.16488 -1.043696,0.27409 -3.200663,2.39829 z"
id="path3430"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.401536,230.99786 c 0,-0.2741 2.156968,-1.98716 2.156968,-1.98716 l 2.017811,1.30193 -0.904535,2.32976 -1.182855,0.75375 z"
id="path3432"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 81.679855,237.8501 0.765375,-1.91863 0.208739,-1.2334 2.156969,0.20557 2.156968,-2.87795 3.409403,1.02784 -0.904535,2.80942 -0.904535,0.34261 -0.626218,2.80943 1.043694,4.72805 -0.904535,1.09636 -1.80907,-2.19272 -0.626217,-1.37045 z"
id="path3434"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 78.131294,238.60385 0.626216,3.08351 -0.626216,3.22056 0.765375,0.95931 -0.626216,5.68737 2.504866,2.32976 1.87865,-0.47965 0.417478,-3.35761 1.669911,-0.0685 3.757301,-1.8501 -0.20874,-1.98716 -2.226548,-0.20556 -1.182854,-3.01499 -3.200662,-2.05568 -1.252434,-2.39828 z"
id="path3436"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 84.532619,251.41755 -0.278318,1.43898 -0.695797,0.6167 1.322013,2.67238 2.365709,-0.20557 1.53075,-2.94647 -2.365707,-1.98715 z"
id="path3438"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 64.993183,249.51154 -1.14959,2.51583 0.766392,1.69818 2.618509,0.25159 0.702526,1.19502 1.021857,2.39003 -0.574794,2.32714 3.89583,1.88688 0.95799,-1.06923 0.510928,-4.59139 -4.023561,-2.70451 -0.127732,-4.21402 z"
id="path3440"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.912822,251.00642 h 1.391592 l 2.574446,0.75375 1.391593,1.98715 1.461172,1.30193 -0.139159,3.42612 -3.409402,1.57602 -0.974115,-1.85011 0.626217,-3.3576 -3.270243,-1.85011 z"
id="path3442"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.147446,264.77944 1.80907,-1.98715 3.339822,-1.85011 1.322013,-0.0685 4.661835,-3.63169 1.391594,0.34261 0.556637,4.52248 -3.200664,4.04283 -2.852765,-0.82227 -1.80907,0.54818 -0.765376,1.43897 -2.087389,0.68522 z"
id="path3444"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 75.06979,272.93361 0.765376,-1.30192 1.252433,-0.41114 0.904535,-2.87794 1.94823,-0.61671 0.556637,2.60386 -3.339822,6.0985 -1.391593,-0.0685 z"
id="path3446"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.451649,268.20556 -1.252433,1.85011 2.504867,1.98715 0.765376,0.82227 1.73949,-2.39829 -2.296127,-2.80942 -1.461173,0.27409 z"
id="path3448"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.24531,254.0948 1.461172,1.02784 1.948229,0.54818 0.487058,1.64454 -1.461173,2.67237 -0.06958,1.78159 -1.669911,1.85011 -1.252433,-2.05568 0.487057,-2.80942 -1.391593,-0.34261 -0.904535,-2.80942 z"
id="path3450"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 47.585836,246.55246 -0.695796,3.70021 -0.139159,1.37045 1.87865,0.68523 1.391592,0.95931 1.809071,-1.64454 -0.417478,-0.95931 z"
id="path3452"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 54.682958,247.78586 -1.043694,1.02784 0.208739,1.98715 1.600331,0.89079 0.626217,-0.47965 0.06958,-2.26125 z"
id="path3454"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 48.629531,258.95503 4.800994,-6.16703 3.409402,0.82227 0.556637,1.78159 3.131083,4.79657 -1.669911,5.82441 -3.200663,-1.37045 -0.417478,-3.49464 -2.087388,1.30192 z"
id="path3456"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,252.71948 -0.487056,1.98715 1.60033,1.57602 1.461174,-0.20557 -0.347899,-2.19272 z"
id="path3458"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 67.555189,261.6274 -1.80907,2.80943 -2.435287,8.42826 2.783185,3.76874 1.461172,-0.0685 1.113274,-2.12419 1.043696,-0.20557 0.487057,-1.09636 -1.043694,-4.45396 1.182853,-4.31692 z"
id="path3460"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.718577,267.79443 1.600331,-1.23341 2.017809,1.71306 -0.904535,1.85011 z"
id="path3462"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.509838,276.49678 2.156968,-4.591 1.391593,-0.27409 0.834955,1.50749 -2.017809,5.13919 z"
id="path3464"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.242911,274.02997 1.391592,0.20557 1.043694,3.01499 2.01781,0.68522 1.530751,1.57602 -0.904535,2.87795 -2.365707,2.32976 -0.139159,3.56317 -1.322013,1.98715 -2.504867,-1.85011 -0.278318,-2.67237 -1.530752,-1.78159 -1.113274,-3.08351 3.61814,-4.17987 z"
id="path3466"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.893354,276.5653 3.270244,1.16489 0.06958,3.70021 -0.556637,0.68523 0.974115,3.70021 1.252433,1.64454 0.06958,3.08351 -2.017809,1.37045 -2.574447,8.08566 -2.574447,-1.30193 -1.948229,-9.79872 z"
id="path3468"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 58.440258,283.5546 h 0.556637 l 0.417478,0.95931 -0.208739,1.30193 -1.461172,0.13704 z"
id="path3472"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 56.700767,279.16916 -1.113274,0.95931 0.834956,2.80943 1.600331,0.20556 0.487058,-2.05567 -0.695796,-1.91863 z"
id="path3474"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 53.152207,272.17987 0.139159,5.13918 1.87865,1.23341 0.834955,-0.54818 0.904535,-3.63169 1.530752,-1.57602 -1.669911,-3.97431 -3.548561,3.08352 z"
id="path3476"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 45.915924,258.33832 -0.208739,3.83726 -4.731414,3.97431 1.948229,2.80942 8.488716,0.82227 0.417478,1.98715 1.043694,-0.75375 0.487057,-2.19272 1.182854,-1.64454 -0.417478,-1.09635 -1.87865,-2.60386 -3.757299,-1.37045 -1.461174,-3.22056 z"
id="path3480"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 40.279975,263.68308 1.669912,0.6167 3.061502,-6.37259 -0.904535,-5.61884 -2.504867,-0.34262 -1.391592,-1.2334 2.156968,-7.606 -2.087388,-4.45396 -3.409402,1.57602 -0.834956,3.42612 -1.87865,0.20557 -0.347898,2.1242 1.530752,1.64454 h 1.322013 l 0.626217,3.90578 2.296127,5.61884 -0.347898,2.19272 z"
id="path3482"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 66.531337,247.61066 -0.590018,-0.31657 -0.420783,-1.71262 0.427793,-0.66945 1.306823,-1.13114 2.316342,-1.38746 1.06612,0.23465 -0.01701,2.21105 -2.36166,3.35302 z"
id="path4284"
inkscape:connector-curvature="0"
inkscape:transform-center-x="4.9927099"
inkscape:transform-center-y="-9.3161687" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 72.373733,232.22199 -0.815102,1.03206 4.017286,4.12827 1.571981,0.17201 1.339096,-0.86006 0.931544,0.63071 2.387083,-2.98152 -2.794634,-0.91739 -3.027519,0.22934 z"
id="path3601"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 57.407878,237.1102 -1.301737,2.34289 -1.301738,0.61888 -0.17955,1.45878 -4.488748,1.54719 -0.403989,1.50299 0.314213,0.30944 1.032412,0.0884 v 1.41457 l 1.660839,1.50299 2.154598,-1.94504 1.571064,0.35364 2.738136,-1.94504 -1.436399,-2.56392 0.987525,-3.44803 -0.583538,-1.37037 z"
id="path3603"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 62.104217,246.96003 5.843936,-6.55723 0.659867,-2.66044 2.221783,-0.40757 -0.386451,-3.39556 -2.000988,-0.60704 -6.246127,-0.36572 -2.624948,2.5137 1.519708,2.75102 -0.347742,5.51876 z"
id="path3605"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 71.024647,249.63275 5.822153,1.31875 1.047988,-3.89891 -1.280874,-1.43343 0.523995,-6.02038 -3.551515,5.275 0.34933,2.06413 -2.037753,0.80272 -1.164431,0.45869 z"
id="path3607"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 59.099222,247.24427 -2.095974,1.72011 -0.05822,1.60543 0.465772,1.72011 1.455539,0.97473 -0.407551,0.97473 2.328861,-0.34402 2.27064,-2.86685 -1.571981,-0.57337 -0.640437,-2.86685 -1.51376,-0.40136 z"
id="path3609"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.078067,234.34346 0.291107,4.47228 -1.863089,1.43342 2.095976,3.72691 2.037753,0.0573 2.27064,-3.55489 -2.969297,-4.98831 z"
id="path3611"
inkscape:connector-curvature="0" />
<path
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
d="m 44.660282,245.46683 -3.318627,4.30027 1.339096,1.26141 2.561747,-0.28668 1.222652,-3.15354 z"
id="path3613"
inkscape:connector-curvature="0" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 34 KiB

Binary file not shown.

54
tests/fuzz.py Normal file
View File

@@ -0,0 +1,54 @@
import mimetypes
import os
import sys
sys.path.append('..')
import atheris
with atheris.instrument_imports(enable_loader_override=False):
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
extensions = set()
for parser in parser_factory._get_parsers(): # type: ignore
for mtype in parser.mimetypes:
if mtype.startswith('video'):
continue
if 'aif' in mtype:
continue
if 'wav' in mtype:
continue
if 'gif' in mtype:
continue
if 'aifc' in mtype:
continue
for extension in mimetypes.guess_all_extensions(mtype):
if extension not in UNSUPPORTED_EXTENSIONS:
extensions.add(extension)
extensions = list(extensions)
def TestOneInput(data):
fdp = atheris.FuzzedDataProvider(data)
extension = fdp.PickValueInList(extensions)
data = fdp.ConsumeBytes(sys.maxsize)
fname = '/tmp/mat2_fuzz' + extension
with open(fname, 'wb') as f:
f.write(data)
try:
p, _ = parser_factory.get_parser(fname)
if p:
p.sandbox = False
p.get_meta()
p.remove_all()
p, _ = parser_factory.get_parser(fname)
p.get_meta()
except ValueError:
pass
os.remove(fname)
atheris.Setup(sys.argv, TestOneInput)
atheris.Fuzz()

View File

@@ -1,100 +1,203 @@
import random
import os
import shutil
import stat
import subprocess
import unittest
import glob
from libmat2 import images, parser_factory
mat2_binary = ['./mat2']
if 'MAT2_GLOBAL_PATH_TESTSUITE' in os.environ:
# Debian runs tests after installing the package
# https://0xacab.org/jvoisin/mat2/issues/16#note_153878
mat2_binary = ['/usr/bin/env', 'mat2']
class TestHelp(unittest.TestCase):
def test_help(self):
proc = subprocess.Popen(['./mat2', '--help'], stdout=subprocess.PIPE)
proc = subprocess.Popen(mat2_binary + ['--help'], stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'usage: mat2 [-h] [-v] [-l] [-c | -s | -L] [files [files ...]]', stdout)
self.assertIn(b'mat2 [-h] [-V]', stdout)
self.assertIn(b'[--unknown-members policy]', stdout)
self.assertIn(b'[--inplace]', stdout)
self.assertIn(b'[--no-sandbox]', stdout)
self.assertIn(b' [-v] [-l]', stdout)
self.assertIn(b'[--check-dependencies]', stdout)
self.assertIn(b'[-L | -s]', stdout)
self.assertIn(b'[files ...]', stdout)
def test_no_arg(self):
proc = subprocess.Popen(['./mat2'], stdout=subprocess.PIPE)
proc = subprocess.Popen(mat2_binary, stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'usage: mat2 [-h] [-v] [-l] [-c | -s | -L] [files [files ...]]', stdout)
self.assertIn(b'mat2 [-h] [-V]', stdout)
self.assertIn(b'[--unknown-members policy]', stdout)
self.assertIn(b'[--inplace]', stdout)
self.assertIn(b'[--no-sandbox]', stdout)
self.assertIn(b' [-v] [-l] [--check-dependencies] [-L | -s]', stdout)
self.assertIn(b'[files ...]', stdout)
class TestVersion(unittest.TestCase):
def test_version(self):
proc = subprocess.Popen(['./mat2', '--version'], stdout=subprocess.PIPE)
proc = subprocess.Popen(mat2_binary + ['--version'], stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertTrue(stdout.startswith(b'MAT2 '))
self.assertTrue(stdout.startswith(b'mat2 '))
class TestExclusiveArgs(unittest.TestCase):
def test_version(self):
proc = subprocess.Popen(['./mat2', '-s', '-c'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = proc.communicate()
self.assertIn(b'mat2: error: argument -c/--check: not allowed with argument -s/--show', stderr)
class TestDependencies(unittest.TestCase):
def test_dependencies(self):
proc = subprocess.Popen(mat2_binary + ['--check-dependencies'], stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertTrue(b'mat2' in stdout)
class TestReturnValue(unittest.TestCase):
def test_nonzero(self):
ret = subprocess.call(['./mat2', './mat2'], stdout=subprocess.DEVNULL)
ret = subprocess.call(mat2_binary + ['mat2'], stdout=subprocess.DEVNULL)
self.assertEqual(255, ret)
ret = subprocess.call(['./mat2', '--whololo'], stderr=subprocess.DEVNULL)
ret = subprocess.call(mat2_binary + ['--whololo'], stderr=subprocess.DEVNULL)
self.assertEqual(2, ret)
def test_zero(self):
ret = subprocess.call(['./mat2'], stdout=subprocess.DEVNULL)
ret = subprocess.call(mat2_binary, stdout=subprocess.DEVNULL)
self.assertEqual(0, ret)
ret = subprocess.call(['./mat2', '--show', './mat2'], stdout=subprocess.DEVNULL)
ret = subprocess.call(mat2_binary + ['--show', 'mat2'], stdout=subprocess.DEVNULL)
self.assertEqual(0, ret)
class TestCleanFolder(unittest.TestCase):
def test_jpg(self):
try:
os.mkdir('./tests/data/folder/')
except FileExistsError:
pass
shutil.copy('./tests/data/dirty.jpg', './tests/data/folder/clean1.jpg')
shutil.copy('./tests/data/dirty.jpg', './tests/data/folder/clean2.jpg')
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/folder/'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: Created with GIMP', stdout)
proc = subprocess.Popen(mat2_binary + ['./tests/data/folder/'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
os.remove('./tests/data/folder/clean1.jpg')
os.remove('./tests/data/folder/clean2.jpg')
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/folder/'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertNotIn(b'Comment: Created with GIMP', stdout)
self.assertIn(b'No metadata found', stdout)
shutil.rmtree('./tests/data/folder/')
class TestCleanMeta(unittest.TestCase):
def test_jpg(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
proc = subprocess.Popen(['./mat2', '--show', './tests/data/clean.jpg'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: Created with GIMP', stdout)
proc = subprocess.Popen(['./mat2', './tests/data/clean.jpg'],
proc = subprocess.Popen(mat2_binary + ['./tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
proc = subprocess.Popen(['./mat2', '--show', './tests/data/clean.cleaned.jpg'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.cleaned.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertNotIn(b'Comment: Created with GIMP', stdout)
os.remove('./tests/data/clean.jpg')
def test_jpg_nosandbox(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
proc = subprocess.Popen(mat2_binary + ['--show', '--no-sandbox', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: Created with GIMP', stdout)
proc = subprocess.Popen(mat2_binary + ['./tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.cleaned.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertNotIn(b'Comment: Created with GIMP', stdout)
os.remove('./tests/data/clean.jpg')
os.remove('./tests/data/clean.cleaned.jpg')
class TestCopyPermissions(unittest.TestCase):
def test_jpg_777(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
os.chmod('./tests/data/clean.jpg', 0o777)
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: Created with GIMP', stdout)
proc = subprocess.Popen(mat2_binary + ['./tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.cleaned.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertNotIn(b'Comment: Created with GIMP', stdout)
permissions = os.stat('./tests/data/clean.cleaned.jpg')[stat.ST_MODE]
self.assertEqual(permissions, 0o100777)
os.remove('./tests/data/clean.jpg')
os.remove('./tests/data/clean.cleaned.jpg')
class TestIsSupported(unittest.TestCase):
def test_pdf(self):
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.pdf'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.pdf'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertNotIn(b"isn't supported", stdout)
class TestGetMeta(unittest.TestCase):
maxDiff = None
def test_pdf(self):
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.pdf'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.pdf'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'producer: pdfTeX-1.40.14', stdout)
self.assertIn(b'Producer: pdfTeX-1.40.14', stdout)
def test_png(self):
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.png'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.png'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: This is a comment, be careful!', stdout)
def test_jpg(self):
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.jpg'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: Created with GIMP', stdout)
def test_docx(self):
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.docx'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.docx'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Application: LibreOffice/5.4.5.1$Linux_X86_64', stdout)
@@ -102,7 +205,7 @@ class TestGetMeta(unittest.TestCase):
self.assertIn(b'revision: 1', stdout)
def test_odt(self):
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.odt'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.odt'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'generator: LibreOffice/3.3$Unix', stdout)
@@ -110,25 +213,126 @@ class TestGetMeta(unittest.TestCase):
self.assertIn(b'date_time: 2011-07-26 02:40:16', stdout)
def test_mp3(self):
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.mp3'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.mp3'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'TALB: harmfull', stdout)
self.assertIn(b'COMM::: Thank you for using MAT !', stdout)
def test_flac(self):
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.flac'],
stdout=subprocess.PIPE)
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.flac'],
stdout=subprocess.PIPE, bufsize=0)
stdout, _ = proc.communicate()
self.assertIn(b'comments: Thank you for using MAT !', stdout)
self.assertIn(b'genre: Python', stdout)
self.assertIn(b'title: I am so', stdout)
def test_ogg(self):
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.ogg'],
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.ogg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'comments: Thank you for using MAT !', stdout)
self.assertIn(b'genre: Python', stdout)
self.assertIn(b'i am a : various comment', stdout)
self.assertIn(b'artist: jvoisin', stdout)
#def test_webp(self):
# proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.webp'],
# stdout=subprocess.PIPE)
# stdout, _ = proc.communicate()
# self.assertIn(b'Warning: [minor] Improper EXIF header', stdout)
class TestControlCharInjection(unittest.TestCase):
def test_jpg(self):
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/control_chars.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b'Comment: GQ\n', stdout)
class TestCommandLineParallel(unittest.TestCase):
iterations = 24
def test_same(self):
for i in range(self.iterations):
shutil.copy('./tests/data/dirty.jpg', './tests/data/dirty_%d.jpg' % i)
proc = subprocess.Popen(mat2_binary + ['./tests/data/dirty_%d.jpg' % i for i in range(self.iterations)],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
for i in range(self.iterations):
path = './tests/data/dirty_%d.jpg' % i
p = images.JPGParser('./tests/data/dirty_%d.cleaned.jpg' % i)
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/dirty_%d.cleaned.jpg' % i)
os.remove(path)
def test_different(self):
src = './tests/data/'
dst = './tests/data/parallel'
shutil.copytree(src, dst)
proc = subprocess.Popen(mat2_binary + glob.glob('./tests/data/parallel/dirty.*'),
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
for i in glob.glob('./test/data/parallel/dirty.cleaned.*'):
p, mime = parser_factory.get_parser(i)
self.assertIsNotNone(mime)
self.assertIsNotNone(p)
p = parser_factory.get_parser(p.output_filename)
self.assertEqual(p.get_meta(), {})
shutil.rmtree('./tests/data/parallel/')
def test_faulty(self):
for i in range(self.iterations):
shutil.copy('./tests/data/dirty.jpg', './tests/data/dirty_%d.jpg' % i)
shutil.copy('./tests/data/dirty.torrent', './tests/data/dirty_%d.docx' % i)
to_process = ['./tests/data/dirty_%d.jpg' % i for i in range(self.iterations)]
to_process.extend(['./tests/data/dirty_%d.docx' % i for i in range(self.iterations)])
random.shuffle(to_process)
proc = subprocess.Popen(mat2_binary + to_process,
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
for i in range(self.iterations):
path = './tests/data/dirty_%d.jpg' % i
p = images.JPGParser('./tests/data/dirty_%d.cleaned.jpg' % i)
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/dirty_%d.cleaned.jpg' % i)
os.remove(path)
os.remove('./tests/data/dirty_%d.docx' % i)
class TestInplaceCleaning(unittest.TestCase):
def test_cleaning(self):
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
proc = subprocess.Popen(mat2_binary + ['--inplace', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
self.assertIn(b' No metadata found in ./tests/data/clean.jpg.\n', stdout)
os.remove('./tests/data/clean.jpg')
def test_cleaning_multiple_one_fails(self):
files = ['./tests/data/clean_%d.jpg' % i for i in range(9)]
for f in files:
shutil.copy('./tests/data/dirty.jpg', f)
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean_9.jpg')
proc = subprocess.Popen(mat2_binary + ['--inplace'] + files,
stdout=subprocess.PIPE)
stdout, _ = proc.communicate()
for f in files:
p = images.JPGParser(f)
meta = p.get_meta()
self.assertEqual(meta, {})
for i in range(10):
os.remove('./tests/data/clean_%d.jpg' % i)

View File

@@ -1,11 +1,57 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import unittest
import stat
import time
import shutil
import os
import logging
import zipfile
import tarfile
from libmat2 import pdf, images, audio, office, parser_factory, torrent, harmless
from libmat2 import pdf, images, audio, office, parser_factory, torrent
from libmat2 import harmless, video, web, archive
# No need to logging messages, should something go wrong,
# the testsuite _will_ fail.
logger = logging.getLogger(__name__)
logger.setLevel(logging.FATAL)
class TestInexistentFiles(unittest.TestCase):
def test_ro(self):
parser, mimetype = parser_factory.get_parser('/etc/passwd')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_notaccessible(self):
parser, mimetype = parser_factory.get_parser('/etc/shadow')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_folder(self):
parser, mimetype = parser_factory.get_parser('./tests/')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_inexistingfile(self):
parser, mimetype = parser_factory.get_parser('./tests/NONEXISTING_FILE')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_chardevice(self):
parser, mimetype = parser_factory.get_parser('/dev/zero')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
def test_brokensymlink(self):
shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
os.symlink('./tests/clean.py', './tests/SYMLINK')
os.remove('./tests/clean.py')
parser, mimetype = parser_factory.get_parser('./tests/SYMLINK')
self.assertEqual(mimetype, None)
self.assertEqual(parser, None)
os.unlink('./tests/SYMLINK')
class TestUnsupportedFiles(unittest.TestCase):
def test_pdf(self):
@@ -15,6 +61,23 @@ class TestUnsupportedFiles(unittest.TestCase):
self.assertEqual(parser, None)
os.remove('./tests/clean.py')
class TestCorruptedEmbedded(unittest.TestCase):
def test_docx(self):
shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx')
parser, _ = parser_factory.get_parser('./tests/data/clean.docx')
with self.assertRaises(ValueError):
parser.remove_all()
with self.assertRaises(ValueError):
self.assertIsNotNone(parser.get_meta())
os.remove('./tests/data/clean.docx')
def test_odt(self):
shutil.copy('./tests/data/embedded_corrupted.odt', './tests/data/clean.odt')
parser, _ = parser_factory.get_parser('./tests/data/clean.odt')
self.assertFalse(parser.remove_all())
self.assertTrue(parser.get_meta())
os.remove('./tests/data/clean.odt')
class TestExplicitelyUnsupportedFiles(unittest.TestCase):
def test_pdf(self):
@@ -25,6 +88,25 @@ class TestExplicitelyUnsupportedFiles(unittest.TestCase):
os.remove('./tests/data/clean.py')
class TestWrongContentTypesFileOffice(unittest.TestCase):
def test_office_incomplete(self):
shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx')
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
def test_office_broken(self):
shutil.copy('./tests/data/broken_xml_content_types.docx', './tests/data/clean.docx')
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
def test_office_absent(self):
shutil.copy('./tests/data/no_content_types.docx', './tests/data/clean.docx')
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
class TestCorruptedFiles(unittest.TestCase):
def test_pdf(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
@@ -40,21 +122,40 @@ class TestCorruptedFiles(unittest.TestCase):
def test_png2(self):
shutil.copy('./tests/test_libmat2.py', './tests/clean.png')
parser, mimetype = parser_factory.get_parser('./tests/clean.png')
self.assertIsNone(parser)
with self.assertRaises(ValueError):
parser_factory.get_parser('./tests/clean.png')
os.remove('./tests/clean.png')
def test_torrent(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.torrent')
p = torrent.TorrentParser('./tests/data/clean.torrent')
self.assertFalse(p.remove_all())
expected = {'Unknown meta': 'Unable to parse torrent file "./tests/data/clean.torrent".'}
self.assertEqual(p.get_meta(), expected)
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "a") as f:
f.write("trailing garbage")
p = torrent.TorrentParser('./tests/data/clean.torrent')
self.assertEqual(p.get_meta(), expected)
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "w") as f:
f.write("i-0e")
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "w") as f:
f.write("i00e")
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "w") as f:
f.write("01:AAAAAAAAA")
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
with open("./tests/data/clean.torrent", "w") as f:
f.write("1:aaa")
with self.assertRaises(ValueError):
torrent.TorrentParser('./tests/data/clean.torrent')
os.remove('./tests/data/clean.torrent')
def test_odg(self):
@@ -65,23 +166,306 @@ class TestCorruptedFiles(unittest.TestCase):
def test_bmp(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')
harmless.HarmlessParser('./tests/data/clean.bmp')
ret = harmless.HarmlessParser('./tests/data/clean.bmp')
self.assertIsNotNone(ret)
os.remove('./tests/data/clean.bmp')
def test_docx(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
with self.assertRaises(ValueError):
office.MSOfficeParser('./tests/data/clean.docx')
office.MSOfficeParser('./tests/data/clean.docx')
os.remove('./tests/data/clean.docx')
def test_flac(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.flac')
with self.assertRaises(ValueError):
audio.FLACParser('./tests/data/clean.flac')
audio.FLACParser('./tests/data/clean.flac')
os.remove('./tests/data/clean.flac')
def test_mp3(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.mp3')
with self.assertRaises(ValueError):
audio.MP3Parser('./tests/data/clean.mp3')
audio.MP3Parser('./tests/data/clean.mp3')
os.remove('./tests/data/clean.mp3')
def test_wrong_tif(self):
shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tif')
p = images.TiffParser('./tests/data/clean.tif')
p.remove_all()
p = images.TiffParser('./tests/data/clean.cleaned.tif')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.tif')
os.remove('./tests/data/clean.cleaned.tif')
def test_jpg(self):
shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.jpg')
with self.assertRaises(ValueError):
images.JPGParser('./tests/data/clean.jpg')
os.remove('./tests/data/clean.jpg')
def test_png_lightweight(self):
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.png')
with self.assertRaises(ValueError):
images.PNGParser('./tests/data/clean.png')
os.remove('./tests/data/clean.png')
def test_avi(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.avi')
p = video.AVIParser('./tests/data/clean.avi')
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.avi')
def test_avi_injection(self):
try:
video._get_ffmpeg_path()
except RuntimeError:
raise unittest.SkipTest
shutil.copy('./tests/data/dirty.torrent', './tests/data/--output.avi')
p = video.AVIParser('./tests/data/--output.avi')
self.assertFalse(p.remove_all())
os.remove('./tests/data/--output.avi')
def test_zip(self):
with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout:
zout.write('./tests/data/dirty.flac')
zout.write('./tests/data/dirty.docx')
zout.write('./tests/data/dirty.jpg')
zout.write('./tests/data/embedded_corrupted.docx')
p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')
self.assertEqual(mimetype, 'application/zip')
with self.assertRaises(ValueError):
p.get_meta()
with self.assertRaises(ValueError):
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.zip')
def test_html(self):
shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
with open('./tests/data/clean.html', 'a') as f:
f.write('<open>but not</closed>')
with self.assertRaises(ValueError):
web.HTMLParser('./tests/data/clean.html')
os.remove('./tests/data/clean.html')
# Yes, we're able to deal with malformed html :/
shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
with open('./tests/data/clean.html', 'a') as f:
f.write('<meta name=\'this" is="weird"/>')
p = web.HTMLParser('./tests/data/clean.html')
self.assertTrue(p.remove_all())
p = web.HTMLParser('./tests/data/clean.cleaned.html')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.html')
os.remove('./tests/data/clean.cleaned.html')
with open('./tests/data/clean.html', 'w') as f:
f.write('</meta>')
with self.assertRaises(ValueError):
web.HTMLParser('./tests/data/clean.html')
os.remove('./tests/data/clean.html')
with open('./tests/data/clean.html', 'w') as f:
f.write('<meta><a>test</a><set/></meta><title></title><meta>')
p = web.HTMLParser('./tests/data/clean.html')
with self.assertRaises(ValueError):
p.get_meta()
p = web.HTMLParser('./tests/data/clean.html')
with self.assertRaises(ValueError):
p.remove_all()
os.remove('./tests/data/clean.html')
with open('./tests/data/clean.html', 'w') as f:
f.write('<doctitle><br/></doctitle><br/><notclosed>')
p = web.HTMLParser('./tests/data/clean.html')
with self.assertRaises(ValueError):
p.get_meta()
p = web.HTMLParser('./tests/data/clean.html')
with self.assertRaises(ValueError):
p.remove_all()
os.remove('./tests/data/clean.html')
def test_epub(self):
with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout:
zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf')
p, mimetype = parser_factory.get_parser('./tests/data/clean.epub')
self.assertEqual(mimetype, 'application/epub+zip')
meta = p.get_meta()
self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'],
'harmful content')
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.epub')
def test_tar(self):
with tarfile.TarFile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('./tests/data/dirty.flac')
zout.add('./tests/data/dirty.docx')
zout.add('./tests/data/dirty.jpg')
zout.add('./tests/data/embedded_corrupted.docx')
tarinfo = tarfile.TarInfo(name='./tests/data/dirty.png')
tarinfo.mtime = time.time()
tarinfo.uid = 1337
tarinfo.gid = 1338
tarinfo.size = os.stat('./tests/data/dirty.png').st_size
with open('./tests/data/dirty.png', 'rb') as f:
zout.addfile(tarinfo, f)
p, mimetype = parser_factory.get_parser('./tests/data/clean.tar')
self.assertEqual(mimetype, 'application/x-tar')
with self.assertRaises(ValueError):
p.get_meta()
with self.assertRaises(ValueError):
self.assertFalse(p.remove_all())
os.remove('./tests/data/clean.tar')
shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar')
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
class TestReadOnlyArchiveMembers(unittest.TestCase):
def test_onlymember_tar(self):
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('./tests/data/dirty.png')
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
tarinfo.mtime = time.time()
tarinfo.uid = 1337
tarinfo.gid = 0
tarinfo.mode = 0o000
tarinfo.size = os.stat('./tests/data/dirty.jpg').st_size
with open('./tests/data/dirty.jpg', 'rb') as f:
zout.addfile(tarinfo=tarinfo, fileobj=f)
p, mimetype = parser_factory.get_parser('./tests/data/clean.tar')
self.assertEqual(mimetype, 'application/x-tar')
meta = p.get_meta()
self.assertEqual(meta['./tests/data/dirty.jpg']['uid'], '1337')
self.assertTrue(p.remove_all())
p = archive.TarParser('./tests/data/clean.cleaned.tar')
self.assertEqual(p.get_meta(), {})
os.remove('./tests/data/clean.tar')
os.remove('./tests/data/clean.cleaned.tar')
class TestPathTraversalArchiveMembers(unittest.TestCase):
def test_tar_traversal(self):
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('./tests/data/dirty.png')
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
tarinfo.name = '../../../../../../../../../../tmp/mat2_test.png'
with open('./tests/data/dirty.jpg', 'rb') as f:
zout.addfile(tarinfo=tarinfo, fileobj=f)
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
def test_tar_absolute_path(self):
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('./tests/data/dirty.png')
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
tarinfo.name = '/etc/passwd'
with open('./tests/data/dirty.jpg', 'rb') as f:
zout.addfile(tarinfo=tarinfo, fileobj=f)
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
def test_tar_duplicate_file(self):
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
for _ in range(3):
zout.add('./tests/data/dirty.png')
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
with open('./tests/data/dirty.jpg', 'rb') as f:
zout.addfile(tarinfo=tarinfo, fileobj=f)
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
def test_tar_setuid(self):
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('./tests/data/dirty.png')
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
tarinfo.mode |= stat.S_ISUID
with open('./tests/data/dirty.jpg', 'rb') as f:
zout.addfile(tarinfo=tarinfo, fileobj=f)
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
def test_tar_setgid(self):
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('./tests/data/dirty.png')
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
tarinfo.mode |= stat.S_ISGID
with open('./tests/data/dirty.jpg', 'rb') as f:
zout.addfile(tarinfo=tarinfo, fileobj=f)
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
def test_tar_symlink_absolute(self):
os.symlink('/etc/passwd', './tests/data/symlink')
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('./tests/data/symlink')
tarinfo = tarfile.TarInfo('./tests/data/symlink')
tarinfo.linkname = '/etc/passwd'
tarinfo.type = tarfile.SYMTYPE
with open('./tests/data/dirty.jpg', 'rb') as f:
zout.addfile(tarinfo=tarinfo, fileobj=f)
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
os.remove('./tests/data/symlink')
def test_tar_symlink_ok(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('./tests/data/dirty.png')
t = tarfile.TarInfo('mydir')
t.type = tarfile.DIRTYPE
zout.addfile(t)
zout.add('./tests/data/clean.png')
t = tarfile.TarInfo('mylink')
t.type = tarfile.SYMTYPE
t.linkname = './tests/data/clean.png'
zout.addfile(t)
zout.add('./tests/data/dirty.jpg')
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
os.remove('./tests/data/clean.png')
def test_tar_symlink_relative(self):
os.symlink('../../../etc/passwd', './tests/data/symlink')
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('./tests/data/symlink')
tarinfo = tarfile.TarInfo('./tests/data/symlink')
with open('./tests/data/dirty.jpg', 'rb') as f:
zout.addfile(tarinfo=tarinfo, fileobj=f)
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
os.remove('./tests/data/symlink')
def test_tar_device_file(self):
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
zout.add('/dev/null')
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/clean.tar')
os.remove('./tests/data/clean.tar')
def test_tar_hardlink(self):
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
os.link('./tests/data/clean.png', './tests/data/hardlink.png')
with tarfile.open('./tests/data/cleaner.tar', 'w') as zout:
zout.add('tests/data/clean.png')
zout.add('tests/data/hardlink.png')
with self.assertRaises(ValueError):
archive.TarParser('./tests/data/cleaner.tar')
os.remove('./tests/data/cleaner.tar')
os.remove('./tests/data/clean.png')
os.remove('./tests/data/hardlink.png')

170
tests/test_deep_cleaning.py Normal file
View File

@@ -0,0 +1,170 @@
#!/usr/bin/env python3
import unittest
import shutil
import os
import zipfile
import tempfile
from libmat2 import office, parser_factory
class TestZipMetadata(unittest.TestCase):
def __check_deep_meta(self, p):
tempdir = tempfile.mkdtemp()
zipin = zipfile.ZipFile(p.filename)
zipin.extractall(tempdir)
for subdir, dirs, files in os.walk(tempdir):
for f in files:
complete_path = os.path.join(subdir, f)
inside_p, _ = parser_factory.get_parser(complete_path)
if inside_p is None:
continue
self.assertEqual(inside_p.get_meta(), {})
shutil.rmtree(tempdir)
def __check_zip_meta(self, p):
zipin = zipfile.ZipFile(p.filename)
for item in zipin.infolist():
self.assertEqual(item.comment, b'')
self.assertEqual(item.date_time, (1980, 1, 1, 0, 0, 0))
self.assertEqual(item.create_system, 3) # 3 is UNIX
def test_office(self):
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
self.assertEqual(meta['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
ret = p.remove_all()
self.assertTrue(ret)
p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
self.assertEqual(p.get_meta(), {})
self.__check_zip_meta(p)
self.__check_deep_meta(p)
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
def test_libreoffice(self):
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
p = office.LibreOfficeParser('./tests/data/clean.odt')
meta = p.get_meta()
self.assertIsNotNone(meta)
ret = p.remove_all()
self.assertTrue(ret)
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
self.assertEqual(p.get_meta(), {})
self.__check_zip_meta(p)
self.__check_deep_meta(p)
os.remove('./tests/data/clean.odt')
os.remove('./tests/data/clean.cleaned.odt')
class TestZipOrder(unittest.TestCase):
def test_libreoffice(self):
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
p = office.LibreOfficeParser('./tests/data/clean.odt')
meta = p.get_meta()
self.assertIsNotNone(meta)
is_unordered = False
with zipfile.ZipFile('./tests/data/clean.odt') as zin:
previous_name = ''
for item in zin.infolist():
if previous_name == '':
if item.filename == 'mimetype':
continue
previous_name = item.filename
continue
elif item.filename < previous_name:
is_unordered = True
break
self.assertTrue(is_unordered)
ret = p.remove_all()
self.assertTrue(ret)
with zipfile.ZipFile('./tests/data/clean.cleaned.odt') as zin:
previous_name = ''
for item in zin.infolist():
if previous_name == '':
if item.filename == 'mimetype':
continue
previous_name = item.filename
continue
self.assertGreaterEqual(item.filename, previous_name)
os.remove('./tests/data/clean.odt')
os.remove('./tests/data/clean.cleaned.odt')
class TestRsidRemoval(unittest.TestCase):
def test_office(self):
shutil.copy('./tests/data/office_revision_session_ids.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
how_many_rsid = False
with zipfile.ZipFile('./tests/data/clean.docx') as zin:
for item in zin.infolist():
if not item.filename.endswith('.xml'):
continue
num = zin.read(item).decode('utf-8').lower().count('w:rsid')
how_many_rsid += num
self.assertEqual(how_many_rsid, 11)
ret = p.remove_all()
self.assertTrue(ret)
with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin:
for item in zin.infolist():
if not item.filename.endswith('.xml'):
continue
num = zin.read(item).decode('utf-8').lower().count('w:rsid')
self.assertEqual(num, 0)
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')
class TestNsidRemoval(unittest.TestCase):
def test_office(self):
shutil.copy('./tests/data/dirty_with_nsid.docx', './tests/data/clean.docx')
p = office.MSOfficeParser('./tests/data/clean.docx')
meta = p.get_meta()
self.assertIsNotNone(meta)
how_many_rsid = False
with zipfile.ZipFile('./tests/data/clean.docx') as zin:
for item in zin.infolist():
if not item.filename.endswith('.xml'):
continue
num = zin.read(item).decode('utf-8').lower().count('w:rsid')
how_many_rsid += num
self.assertEqual(how_many_rsid, 1190)
ret = p.remove_all()
self.assertTrue(ret)
with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin:
for item in zin.infolist():
if not item.filename.endswith('.xml'):
continue
num = zin.read(item).decode('utf-8').lower().count('w:nsid')
self.assertEqual(num, 0)
os.remove('./tests/data/clean.docx')
os.remove('./tests/data/clean.cleaned.docx')

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env python3
import unittest
import shutil
import os
from libmat2 import pdf, images, torrent
class TestLightWeightCleaning(unittest.TestCase):
data = [{
'name': 'pdf',
'parser': pdf.PDFParser,
'meta': {'producer': 'pdfTeX-1.40.14'},
'expected_meta': {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1},
}, {
'name': 'png',
'parser': images.PNGParser,
'meta': {'Comment': 'This is a comment, be careful!'},
'expected_meta': {},
}, {
'name': 'jpg',
'parser': images.JPGParser,
'meta': {'Comment': 'Created with GIMP'},
'expected_meta': {},
#}, {
# 'name': 'webp',
# 'parser': images.WEBPParser,
# 'meta': {'Warning': '[minor] Improper EXIF header'},
# 'expected_meta': {},
}, {
'name': 'torrent',
'parser': torrent.TorrentParser,
'meta': {'created by': b'mktorrent 1.0'},
'expected_meta': {},
},{
'name': 'tiff',
'parser': images.TiffParser,
'meta': {'ImageDescription': 'OLYMPUS DIGITAL CAMERA '},
'expected_meta': {
'ResolutionUnit': 'inches',
'XResolution': 72,
'YResolution': 72
}
},
]
def test_all(self):
for case in self.data:
target = './tests/data/clean.' + case['name']
shutil.copy('./tests/data/dirty.' + case['name'], target)
p1 = case['parser'](target)
meta = p1.get_meta()
for k, v in case['meta'].items():
self.assertEqual(meta[k], v)
p1.lightweight_cleaning = True
self.assertTrue(p1.remove_all())
p2 = case['parser'](p1.output_filename)
self.assertEqual(p2.get_meta(), case['expected_meta'])
os.remove(target)
os.remove(p1.output_filename)
def test_exiftool_overwrite(self):
target = './tests/data/clean.png'
shutil.copy('./tests/data/dirty.png', target)
p1 = images.PNGParser(target)
p1.lightweight_cleaning = True
shutil.copy('./tests/data/dirty.png', p1.output_filename)
self.assertTrue(p1.remove_all())
p2 = images.PNGParser(p1.output_filename)
self.assertEqual(p2.get_meta(), {})
os.remove(target)
os.remove(p1.output_filename)

32
tests/test_policy.py Normal file
View File

@@ -0,0 +1,32 @@
#!/usr/bin/env python3
import unittest
import shutil
import os
from libmat2 import office, UnknownMemberPolicy
class TestPolicy(unittest.TestCase):
target = './tests/data/clean.docx'
def test_policy_omit(self):
shutil.copy('./tests/data/embedded.docx', self.target)
p = office.MSOfficeParser(self.target)
p.unknown_member_policy = UnknownMemberPolicy.OMIT
self.assertTrue(p.remove_all())
os.remove(p.filename)
def test_policy_keep(self):
shutil.copy('./tests/data/embedded.docx', self.target)
p = office.MSOfficeParser(self.target)
p.unknown_member_policy = UnknownMemberPolicy.KEEP
self.assertTrue(p.remove_all())
os.remove(p.filename)
os.remove(p.output_filename)
def test_policy_unknown(self):
shutil.copy('./tests/data/embedded.docx', self.target)
p = office.MSOfficeParser(self.target)
with self.assertRaises(ValueError):
p.unknown_member_policy = UnknownMemberPolicy('unknown_policy_name_totally_invalid')
os.remove(p.filename)