Compare commits
482 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
235403bc11 | ||
|
102f08cd28 | ||
|
7a8ea224bc | ||
|
504efb2448 | ||
|
f07344444d | ||
|
473903b70e | ||
|
1438cf7bd4 | ||
|
e740a9559f | ||
|
2b58eece50 | ||
|
29f404bce3 | ||
|
6c966f2afa | ||
|
70d236a062 | ||
|
d61fb7f77a | ||
|
1aed4ff2a5 | ||
|
75c0a750c1 | ||
|
a47ac01eb6 | ||
|
156855ab7e | ||
|
09672a2dcc | ||
|
f2c898c92d | ||
|
f931a0ecee | ||
|
61f39c4bd0 | ||
|
1b9ce34e2c | ||
|
17e76ab6f0 | ||
|
94ef57c994 | ||
|
05d1ca5841 | ||
|
55b468ded7 | ||
|
0fcafa2edd | ||
|
7405955ab5 | ||
|
e6564509e1 | ||
|
bbd5b2817c | ||
|
73f2a87aa0 | ||
|
abcdf07ef4 | ||
|
a3081bce47 | ||
|
47d5529840 | ||
|
fa44794dfd | ||
|
04786d75da | ||
|
cb7b5747a8 | ||
|
8c26020f67 | ||
|
a0c97b25c4 | ||
|
1bcb945360 | ||
|
9159fe8705 | ||
|
1b9608aecf | ||
|
2ac8c24dac | ||
|
71ecac85b0 | ||
|
b9677d8655 | ||
|
6fde80d3e3 | ||
|
6c05360afa | ||
|
596696dfbc | ||
|
daa17a3e9c | ||
|
6061f47231 | ||
|
8b41764a3e | ||
|
ed0ffa5693 | ||
|
b1c03bce72 | ||
|
a63011b3f6 | ||
|
e41390eb64 | ||
|
66a36f6b15 | ||
|
3cb3f58084 | ||
|
39fb254e01 | ||
|
1f73a16ef3 | ||
|
e8b38f1101 | ||
|
8d7230ba16 | ||
|
2b02c82e7f | ||
|
b00e221675 | ||
|
62a45c29df | ||
|
6479d869e4 | ||
|
29057d6cdf | ||
|
180ea24e5a | ||
|
618e0a8e39 | ||
|
6d93cf9397 | ||
|
b1a16b334f | ||
|
0501359600 | ||
|
cc5be8608b | ||
|
292f44c086 | ||
|
2dd196c2c7 | ||
|
34eb878aae | ||
|
eec5c33a6b | ||
|
beebca4bf1 | ||
|
e2c4dbf721 | ||
|
704367f91e | ||
|
2639713709 | ||
|
b18e6e11f0 | ||
|
62dc8c71c1 | ||
|
697e9583b9 | ||
|
1b37604d3a | ||
|
1c3e2afa1e | ||
|
05b8e97b68 | ||
|
2a74a400e2 | ||
|
5ccddae7f5 | ||
|
12582ba2f5 | ||
|
35092562e6 | ||
|
e5dcd39225 | ||
|
660f0dea73 | ||
|
cd2b9af902 | ||
|
3378f3ab8c | ||
|
48680b9852 | ||
|
d555a02c90 | ||
|
143bb0a5f3 | ||
|
a1a7c76dc9 | ||
|
01b39aa68c | ||
|
e312868c4e | ||
|
b71bafd2cf | ||
|
22199df4d0 | ||
|
1703ed6ebb | ||
|
541b3c83b2 | ||
|
6afb0cb9d8 | ||
|
1c4e98425a | ||
|
fb7440ab5d | ||
|
0c91ac7367 | ||
|
708841f9f5 | ||
|
d4479d9baa | ||
|
08a5792a9a | ||
|
3b094ae449 | ||
|
0b094b594b | ||
|
8c1107c358 | ||
|
6df615281b | ||
|
49c8b14e59 | ||
|
bf0c777cb9 | ||
|
682552d152 | ||
|
c9be50f968 | ||
|
2eec653e99 | ||
|
85c08c5b68 | ||
|
c5841a241d | ||
|
d00ca800b2 | ||
|
8b42b28b70 | ||
|
e2362b8620 | ||
|
626669f95f | ||
|
497f5f71fc | ||
|
cd5f2eb71c | ||
|
ec082d6483 | ||
|
f8111547ae | ||
|
88fa71fbde | ||
|
6cd28ed46c | ||
|
92dcc8175d | ||
|
7131aa6fd7 | ||
|
7ce2b5121b | ||
|
a517f8d36e | ||
|
61dce89fbd | ||
|
88b7ec2c48 | ||
|
8bea98911e | ||
|
62ec8f6c1e | ||
|
148bcbba52 | ||
|
b3def8b5de | ||
|
77dde8a049 | ||
|
1b361ec27e | ||
|
58a1563a99 | ||
|
f638168033 | ||
|
b84f73c5c3 | ||
|
96e639dfd3 | ||
|
46b3ae1672 | ||
|
d0bc79442b | ||
|
17919c73a9 | ||
|
60d820b053 | ||
|
461534a966 | ||
|
d8b68ef68e | ||
|
c8dc020dc5 | ||
|
599909a760 | ||
|
d008b1e2f0 | ||
|
d7a03d907b | ||
|
a23dc001cd | ||
|
f93df85d03 | ||
|
e5b1068ed6 | ||
|
843c0d8cc5 | ||
|
56d2c4aa5f | ||
|
12f23e0150 | ||
|
72f41c5e05 | ||
|
5270071b94 | ||
|
5312603a88 | ||
|
ebe06cb8a9 | ||
|
6dd48de4ef | ||
|
e0f4f0e302 | ||
|
4acf3af002 | ||
|
ee704db2ff | ||
|
693408f1a6 | ||
|
0902e9e330 | ||
|
b2efffdaa4 | ||
|
7465cedee7 | ||
|
f5aef1b391 | ||
|
2e3496d3d4 | ||
|
be24c681ff | ||
|
efa525c102 | ||
|
f67cd9d7dc | ||
|
615997be38 | ||
|
4ba4b143e6 | ||
|
8c7b23be90 | ||
|
db797e3a52 | ||
|
da182dc2f8 | ||
|
e4114af3b5 | ||
|
d56f83bed1 | ||
|
697cb36b81 | ||
|
6e52661cfb | ||
|
03f5129968 | ||
|
deeee256cc | ||
|
df1eb98a40 | ||
|
ada53cb9c6 | ||
|
655c19d17d | ||
|
a389cc760a | ||
|
4034cf9a1a | ||
|
5f0b3beb46 | ||
|
3cef7fe7fc | ||
|
6d19a20935 | ||
|
12489bb682 | ||
|
bb903ec309 | ||
|
893faa6604 | ||
|
4483c06f19 | ||
|
58773088ac | ||
|
3714553185 | ||
|
1678d37856 | ||
|
397a18b0cc | ||
|
fc924239fe | ||
|
0170f0e37e | ||
|
0cf0541ad9 | ||
|
40669186c9 | ||
|
d76a6cbb18 | ||
|
49e0c43ac5 | ||
|
0c75cd15dc | ||
|
5280b6c2b3 | ||
|
a81ea65d44 | ||
|
8bb2826f7a | ||
|
5c33b290ae | ||
|
00d728f6cc | ||
|
65cfd110f9 | ||
|
1f830bf8ad | ||
|
d027008e46 | ||
|
1163bdd991 | ||
|
1be0a4eefb | ||
|
dc5603eb1d | ||
|
4999209f9c | ||
|
bdd5581033 | ||
|
47f9cb33bf | ||
|
b784a9fc7f | ||
|
88b95923ab | ||
|
13d71a2565 | ||
|
35d550d229 | ||
|
aa52a5c91c | ||
|
f19f6ed8b6 | ||
|
51ab2db279 | ||
|
ef665e6dc1 | ||
|
aa0ff643c4 | ||
|
dd9ead4ebe | ||
|
d0ab2c3023 | ||
|
fe1950ac3e | ||
|
97abafdc58 | ||
|
f1a06e805b | ||
|
4f0e0685ca | ||
|
911d822c44 | ||
|
7e031c9757 | ||
|
9516990693 | ||
|
a7ebb587e1 | ||
|
14a4cddb8b | ||
|
8e41b098d6 | ||
|
82cc822a1d | ||
|
20ed5eb7d6 | ||
|
05f429b197 | ||
|
74afa885f5 | ||
|
1e325c5b5b | ||
|
6c7dc4fada | ||
|
1c79aa951e | ||
|
d454ef5b8e | ||
|
c824a68dd8 | ||
|
c8602b8c7e | ||
|
b4b150a4f5 | ||
|
51ff89c512 | ||
|
b8c92fec09 | ||
|
2405df0469 | ||
|
0e3c2c9b1b | ||
|
2dc097baf3 | ||
|
e40eb92b55 | ||
|
a5a3e4677f | ||
|
adf7adf854 | ||
|
2b4f2199e4 | ||
|
1327089a30 | ||
|
459ed07443 | ||
|
32ca58ef82 | ||
|
6b39edc3f2 | ||
|
18570813c9 | ||
|
5ac91cd4f9 | ||
|
c3f097a82b | ||
|
cb8a016319 | ||
|
55214206b5 | ||
|
73d2966e8c | ||
|
eb2e702f37 | ||
|
545dccc352 | ||
|
524bae5972 | ||
|
c757a9b7ef | ||
|
dda30c48b7 | ||
|
8542e650ec | ||
|
02ff21b158 | ||
|
6b45064c78 | ||
|
a81b7658a8 | ||
|
6e63e03b86 | ||
|
a71488d459 | ||
|
6ef6aaa222 | ||
|
6cc034e81b | ||
|
e1dd439fc8 | ||
|
b9a62d798a | ||
|
54e50450ad | ||
|
433609f8ea | ||
|
e8c1bb0e3c | ||
|
8b5d0c286c | ||
|
8e84ba547a | ||
|
812bf2553b | ||
|
94cdca1ed2 | ||
|
b755aba8ea | ||
|
edce78859b | ||
|
0ab17b973b | ||
|
389311475c | ||
|
505be24be9 | ||
|
ef8265e86a | ||
|
1d75451b77 | ||
|
dc35ef56c8 | ||
|
3aa76cc58e | ||
|
8ff57c5803 | ||
|
04bb8c8ccf | ||
|
3a070b0ab7 | ||
|
283e5e5787 | ||
|
513d897ea0 | ||
|
5a9dc388ad | ||
|
5a08f5b7bf | ||
|
fe885babee | ||
|
1040a594d6 | ||
|
e510a225e3 | ||
|
a98962a0fa | ||
|
9a81b3adfd | ||
|
f1a071d460 | ||
|
38df679a88 | ||
|
44f267a596 | ||
|
5bc88faedf | ||
|
83389a63e9 | ||
|
e70ea811c9 | ||
|
2ae5d909c3 | ||
|
5896387ade | ||
|
d4c050a738 | ||
|
f04d4b28fc | ||
|
da88d30689 | ||
|
f1552b2ccb | ||
|
2ba38dd2a1 | ||
|
b832a59414 | ||
|
6ce88b8b7f | ||
|
2444caccc0 | ||
|
b9dbd12ef9 | ||
|
b2e153b69c | ||
|
35dca4bf1c | ||
|
4ed30b5e00 | ||
|
0d25b18d26 | ||
|
d0f3534eff | ||
|
8675706c93 | ||
|
5e196ecef8 | ||
|
8e98593b02 | ||
|
df252fd71a | ||
|
a1c39104fc | ||
|
34fbd633fd | ||
|
f1ceed13b5 | ||
|
5a5c642a46 | ||
|
84e302ac93 | ||
|
7901fdef2e | ||
|
1b356b8c6f | ||
|
c67bbafb2c | ||
|
5b606f939d | ||
|
156e81fb4c | ||
|
9578e4b4ee | ||
|
a46a7eb6fa | ||
|
a24c59b208 | ||
|
652b8e519f | ||
|
c14be47f95 | ||
|
81a3881aa4 | ||
|
e342671ead | ||
|
212d9c472c | ||
|
a88107c9ca | ||
|
7f629ed2e3 | ||
|
719cdf20fa | ||
|
2e243355f5 | ||
|
174d4a0ac0 | ||
|
fbcf68c280 | ||
|
9826de3526 | ||
|
ab71c29a28 | ||
|
3d2842802c | ||
|
a1a06d023e | ||
|
9275d64be5 | ||
|
0a2a398c9c | ||
|
5cf94bd256 | ||
|
de65f4f4d4 | ||
|
759efa03ee | ||
|
9fe6f1023b | ||
|
e3d817f57e | ||
|
2e9adab86a | ||
|
c8c27dcf38 | ||
|
120b204988 | ||
|
f3cef319b9 | ||
|
2d9ba81a84 | ||
|
072ee1814d | ||
|
3649c0ccaf | ||
|
119085f28d | ||
|
e515d907d7 | ||
|
46bb1b83ea | ||
|
1d7e374e5b | ||
|
915dc634c4 | ||
|
10d60bd398 | ||
|
4192a2daa3 | ||
|
9ce458cb3b | ||
|
907fc591cc | ||
|
8255293d1d | ||
|
6b7e8ad8c0 | ||
|
b7a8622682 | ||
|
3e2890eb9e | ||
|
91e80527fc | ||
|
7877ba0da5 | ||
|
e2634f7a50 | ||
|
aba9b72d2c | ||
|
15dd3d84ff | ||
|
588466f4a8 | ||
|
cf89ff45c2 | ||
|
f583d12564 | ||
|
1c72448e58 | ||
|
f068621628 | ||
|
fe09d81ab1 | ||
|
5be66dbe91 | ||
|
ee496cfa7f | ||
|
6e2e411a2a | ||
|
2ce1dc793e | ||
|
e27768824a | ||
|
36c5bad140 | ||
|
b5a9520a60 | ||
|
a1257c538b | ||
|
6d8e999f12 | ||
|
1bc4c7aac9 | ||
|
03245a8731 | ||
|
27445e9134 | ||
|
b32ba9f736 | ||
|
e9f28edf73 | ||
|
7697f9c085 | ||
|
e920083559 | ||
|
71b1ced842 | ||
|
942859601d | ||
|
565cb66d14 | ||
|
052a356750 | ||
|
2f670651cf | ||
|
0cd510938a | ||
|
dc026f99ad | ||
|
0aac0d644d | ||
|
17e69b6005 | ||
|
cf5f3b268d | ||
|
a5eede9a21 | ||
|
926e8dac5f | ||
|
edc5f86552 | ||
|
84d50f97c0 | ||
|
8093dce88e | ||
|
5a7c7f35f7 | ||
|
d5861e4653 | ||
|
22e3918f67 | ||
|
080d6769ca | ||
|
86fe3aa584 | ||
|
cc327b1592 | ||
|
b4edd6d2a2 | ||
|
bd357b85f8 | ||
|
8c21006e6c | ||
|
f49aa5cab7 | ||
|
52a2c800b7 | ||
|
ad3e7ccee8 | ||
|
ca01484126 | ||
|
f9bc022c96 | ||
|
72e1fda18d | ||
|
3cd4f9111f | ||
|
b5fcddd6a6 | ||
|
7ea362d908 | ||
|
85455a4419 | ||
|
9f631a1bb1 | ||
|
c2ef35d1f1 | ||
|
3d80f97524 | ||
|
53271495f7 | ||
|
0638b9bbbb | ||
|
893f58554a | ||
|
11008f8fd4 | ||
|
a430403c7e | ||
|
bee56a57ce | ||
|
02f7605ac1 | ||
|
80fc4ffb40 | ||
|
177184ac67 | ||
|
f44769df41 | ||
|
1e9906de29 | ||
|
63b19416ef | ||
|
74f2d50433 | ||
|
b4ef0c9622 |
45
.github/workflows/builds.yaml
vendored
Normal file
@@ -0,0 +1,45 @@
|
||||
name: CI for Python versions
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
schedule:
|
||||
- cron: '0 16 * * 5'
|
||||
|
||||
jobs:
|
||||
linting:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/setup-python@v5
|
||||
- run: pip install ruff
|
||||
- run: |
|
||||
ruff check .
|
||||
build:
|
||||
needs: linting
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13", "3.14.0-rc.2"]
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Setup Python
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt-get install --no-install-recommends --no-install-suggests --yes \
|
||||
ffmpeg \
|
||||
gir1.2-gdkpixbuf-2.0 \
|
||||
gir1.2-poppler-0.18 \
|
||||
gir1.2-rsvg-2.0 \
|
||||
libimage-exiftool-perl \
|
||||
python3-gi-cairo \
|
||||
libcairo2-dev \
|
||||
libgirepository-2.0-dev \
|
||||
libgirepository1.0-dev \
|
||||
gobject-introspection \
|
||||
python3-mutagen
|
||||
pip install .
|
||||
- name: Build and run the testsuite
|
||||
run: python3 -m unittest discover -v
|
4
.gitignore
vendored
@@ -1,5 +1,9 @@
|
||||
.*
|
||||
*.pyc
|
||||
.coverage
|
||||
.eggs
|
||||
.mypy_cache/
|
||||
build
|
||||
dist
|
||||
mat2.egg-info
|
||||
tags
|
||||
|
@@ -1,46 +0,0 @@
|
||||
image: debian
|
||||
|
||||
stages:
|
||||
- linting
|
||||
- test
|
||||
|
||||
bandit:
|
||||
stage: linting
|
||||
script:
|
||||
- apt-get -qqy update
|
||||
- apt-get -qqy install --no-install-recommends python3-bandit
|
||||
- bandit -r ./libmat2 --format txt --skip B101,B404,B603
|
||||
|
||||
pyflakes:
|
||||
stage: linting
|
||||
script:
|
||||
- apt-get -qqy update
|
||||
- apt-get -qqy install --no-install-recommends pyflakes3
|
||||
- pyflakes3 ./libmat2
|
||||
|
||||
mypy:
|
||||
stage: linting
|
||||
script:
|
||||
- apt-get -qqy update
|
||||
- apt-get -qqy install --no-install-recommends python3-pip
|
||||
- pip3 install mypy
|
||||
- mypy mat2 libmat2/*.py --ignore-missing-imports
|
||||
|
||||
tests:debian:
|
||||
stage: test
|
||||
script:
|
||||
- apt-get -qqy update
|
||||
- apt-get -qqy install --no-install-recommends python3-mutagen python3-gi-cairo gir1.2-poppler-0.18 gir1.2-gdkpixbuf-2.0 libimage-exiftool-perl python3-coverage
|
||||
- python3-coverage run --branch -m unittest discover -s tests/
|
||||
- python3-coverage report -m --include 'libmat2/*'
|
||||
|
||||
tests:fedora:
|
||||
image: fedora
|
||||
stage: test
|
||||
script:
|
||||
- dnf install -y python3 python3-mutagen python3-gobject
|
||||
- dnf install -y gdk-pixbuf2 poppler-glib gdk-pixbuf2 gdk-pixbuf2-modules
|
||||
- dnf install -y cairo-gobject cairo python3-cairo
|
||||
- dnf install -y perl-Image-ExifTool mailcap
|
||||
- gdk-pixbuf-query-loaders-64 > /usr/lib64/gdk-pixbuf-2.0/2.10.0/loaders.cache
|
||||
- python3 setup.py test
|
5
.mailmap
Normal file
@@ -0,0 +1,5 @@
|
||||
Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> totallylegit <totallylegit@dustri.org>
|
||||
Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> jvoisin <julien.voisin@dustri.org>
|
||||
Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org> jvoisin <jvoisin@riseup.net>
|
||||
|
||||
Daniel Kahn Gillmor <dkg@fifthhorseman.net> dkg <dkg@fifthhorseman.net>
|
218
CHANGELOG.md
@@ -1,10 +1,224 @@
|
||||
# 0.13.5 - 2025-01-09
|
||||
- Keep orientation metadata on jpeg and tiff files
|
||||
- Improve cairo-related error/exceptions handling
|
||||
- Improve the logging
|
||||
- Improve the sandboxing
|
||||
- Improve Python3.12 support
|
||||
- Improve MSOffice documents handling
|
||||
|
||||
# 0.13.4 - 2023-08-02
|
||||
|
||||
- Add documentation about mat2 on OSX
|
||||
- Make use of python3.7 constructs to simplify code
|
||||
- Use moderner type annotations
|
||||
- Harden get_meta in archive.py against variants of CVE-2022-35410
|
||||
- Improve MSOffice document support
|
||||
- Package the manpage on pypi
|
||||
|
||||
# 0.13.3 - 2023-02-23
|
||||
|
||||
- Fix a decorator argument
|
||||
|
||||
# 0.13.2 - 2023-01-28
|
||||
|
||||
- Fix a crash on some python versions
|
||||
|
||||
# 0.13.1 - 2023-01-07
|
||||
|
||||
- Improve xlsx support
|
||||
- Remove the Nautilus extension
|
||||
|
||||
# 0.13.0 - 2022-07-06
|
||||
|
||||
- Fix an arbitrary file read (CVE-2022-35410)
|
||||
- Add support for heic files
|
||||
|
||||
# 0.12.4 - 2022-04-30
|
||||
|
||||
- Fix possible errors/crashes when processing multiple files
|
||||
via the command line interface
|
||||
- Use a fixed PDF version for the output
|
||||
- Improve compatibility with modern versions of rsvg
|
||||
- Improve the robustness of the command line interface with
|
||||
regard to control characters
|
||||
|
||||
# 0.12.3 - 2022-01-06
|
||||
|
||||
- Implement code for internationalization
|
||||
- Keep individual files compression type in zip files
|
||||
- Increase the robustness of mat2 against weird/corrupted files
|
||||
- Fix the dolphin integration
|
||||
- Add a fuzzer
|
||||
|
||||
# 0.12.2 - 2021-08-29
|
||||
|
||||
- Add support for aiff files
|
||||
- Improve MS Office support
|
||||
- Improve compatibility with newer/older version of mat2's dependencies
|
||||
- Fix possible issues with the resolution of processed pdf
|
||||
|
||||
# 0.12.1 - 2021-03-19
|
||||
|
||||
- Improve epub support
|
||||
- Improve MS Office support
|
||||
|
||||
# 0.12.0 - 2020-12-18
|
||||
|
||||
- Improve significantly MS Office formats support
|
||||
- Fix some typos in the Nautilus extension
|
||||
- Improve reliability of the mp3, pdf and svg parsers
|
||||
- Improve compatibility with ffmpeg when sandboxing is used
|
||||
- Improve the dolphin extension usability
|
||||
- libmat2 now raises a ValueError on malformed files while trying to
|
||||
find the right parser, instead of returning None
|
||||
|
||||
# 0.11.0 - 2020-03-29
|
||||
|
||||
- Improve significantly MS Office formats support
|
||||
- Refactor how mat2 looks for executables
|
||||
|
||||
# 0.10.1 - 2020-02-09
|
||||
|
||||
- Improve the documentation and the manpage
|
||||
- Improve the robustness of css, html, png, gdk-based, exiftool-based parsers
|
||||
- Future-proof a bit the testsuite
|
||||
- Handle tiff files with a .tif extension
|
||||
- Improve the sandbox' usability
|
||||
- Add support for wav files
|
||||
|
||||
# 0.10.0 - 2019-11-30
|
||||
|
||||
- Make mat2 work on Python3.8
|
||||
- Minor improvement of ppt handling
|
||||
- Minor improvement of odt handling
|
||||
- Add an integration KDE's file manager: Dolphin
|
||||
- mat2 now copies file permissions on the cleaned files
|
||||
- Add a flag to disable sandboxing
|
||||
- Tighten a bit the sandboxing
|
||||
- Improve handling of MSOffice documents
|
||||
- Add support for inplace cleaning
|
||||
- Better handling of mutually-exclusive arguments in the command line
|
||||
- Add support for svg
|
||||
- Add support for ppm
|
||||
- Cleaned zip files are compressed by default
|
||||
- Minor performances improvement when dealing with images/video files
|
||||
- Better handling of optional dependencies
|
||||
|
||||
# 0.9.0 - 2019-05-10
|
||||
|
||||
- Add tar/tar.gz/tar.bz2/tar.zx archives support
|
||||
- Add support for xhtml files
|
||||
- Improve handling of read-only files
|
||||
- Improve a bit the command line's documentation
|
||||
- Fix a confusing error message
|
||||
- Add even more tests
|
||||
- Usuals internal cleanups/refactorings
|
||||
|
||||
# 0.8.0 - 2019-02-28
|
||||
|
||||
- Add support for epub files
|
||||
- Fix the setup.py file crashing on non-utf8 platforms
|
||||
- Improve css support
|
||||
- Improve html support
|
||||
|
||||
# 0.7.0 - 2019-02-17
|
||||
|
||||
- Add support for wmv files
|
||||
- Add support for gif files
|
||||
- Add support for html files
|
||||
- Sandbox external processes via bubblewrap
|
||||
- Simplify archive-based formats processing
|
||||
- The Nautilus extension now plays nicer with other extensions
|
||||
|
||||
# 0.6.0 - 2018-11-10
|
||||
|
||||
- Add lightweight cleaning for jpeg
|
||||
- Add support for zip files
|
||||
- Add support for mp4 files
|
||||
- Improve metadata extraction for archives
|
||||
- Improve robustness against corrupted embedded files
|
||||
- Fix a possible security issue on some terminals (control character
|
||||
injection via --show)
|
||||
- Various internal cleanup/improvements
|
||||
|
||||
# 0.5.0 - 2018-10-23
|
||||
|
||||
- Video (.avi files for now) support, via FFmpeg, optionally
|
||||
- Lightweight cleaning for png and tiff files
|
||||
- Processing files starting with a dash is now quicker
|
||||
- Metadata are now displayed sorted
|
||||
- Recursive metadata support for FLAC files
|
||||
- Unsupported extensions aren't displayed in `./mat2 -l` anymore
|
||||
- Improve the display when no metadata are found
|
||||
- Update the logo according to the GNOME guidelines
|
||||
- The testsuite is now runnable on the installed version of mat2
|
||||
- Various internal cleanup/improvements
|
||||
|
||||
# 0.4.0 - 2018-10-03
|
||||
|
||||
- There is now a policy, for advanced users, to deal with unknown embedded fileformats
|
||||
- Improve the documentation
|
||||
- Various minor refactoring
|
||||
- Improve how corrupted PNG are handled
|
||||
- Dangerous/advanced cli's options no longer have short versions
|
||||
- Significant improvements to office files anonymisation
|
||||
- Archive members are sorted lexicographically
|
||||
- XML attributes are sorted lexicographically too
|
||||
- RSID are now stripped
|
||||
- Dangling references in [Content_types].xml are now removed
|
||||
- Significant improvements to office files support
|
||||
- Anonimysed office files can now be opened by MS Office without warnings
|
||||
- The CLI isn't threaded anymore, for it was causing issues
|
||||
- Various misc typo fix
|
||||
|
||||
# 0.3.1 - 2018-09-01
|
||||
|
||||
- Document how to install mat2 for various distributions
|
||||
- Fix various typos in the documentation/comments
|
||||
- Add ArchLinux to the CI to ensure that mat2 is running on it
|
||||
- Fix the handling of files with a name ending in `.JPG`
|
||||
- Improve the detection of unsupported extensions in upper-case
|
||||
- Streamline mat2's logging
|
||||
|
||||
|
||||
# 0.3.0 - 2018-08-03
|
||||
|
||||
- Add a check for missing dependencies
|
||||
- Add Nautilus extension
|
||||
- Minors code simplifications
|
||||
- Improve our linters' coverage
|
||||
- Add a manpage
|
||||
- Add folder/multiple files related tests
|
||||
- Change the logo
|
||||
|
||||
|
||||
# 0.2.0 - 2018-07-10
|
||||
|
||||
- Fix various crashes dues to malformed files
|
||||
- Simplify various code-paths
|
||||
- Remove superfluous debug message
|
||||
- Remove the `--check` option that never was implemented anyway
|
||||
- Add a `-c` option to check for mat2's dependencies
|
||||
|
||||
|
||||
# 0.1.3 - 2018-07-06
|
||||
|
||||
- Improve mat2 resilience against corrupted images
|
||||
- Check that the minimal version of Poppler is available
|
||||
- Simplify how mat2 deals with office files
|
||||
- Improve cleaning of office files
|
||||
- Thumbnails are removed
|
||||
- Revisions are removed
|
||||
- Add support for plain text files
|
||||
|
||||
|
||||
# 0.1.2 - 2018-06-21
|
||||
|
||||
- Rename some files to ease the packaging
|
||||
- Add linters to the CI (mypy, bandit and pyflakes)
|
||||
- Prevent exitftool-related parameters injections
|
||||
- Improve MAT2's resilience against corrupted files
|
||||
- Make MAT2 work on fedora, thanks to @atenart
|
||||
- Improve mat2's resilience against corrupted files
|
||||
- Make mat2 work on fedora, thanks to @atenart
|
||||
- Tighten the threat model
|
||||
- Simplify and improve how office files are handled
|
||||
|
||||
|
@@ -1,20 +1,45 @@
|
||||
# Contributing to MAT2
|
||||
# Contributing to mat2
|
||||
|
||||
The main repository for MAT2 is on [0xacab]( https://0xacab.org/jvoisin/mat2 ),
|
||||
with a mirror on [gitlab.com]( https://gitlab.com/jvoisin/mat2 ).
|
||||
The main repository for mat2 is on [github]( https://github.com/jvoisin/mat2 ),
|
||||
but you can send patches to jvoisin by [email](https://dustri.org/) if you prefer.
|
||||
|
||||
Do feel free to pick up [an issue]( https://0xacab.org/jvoisin/mat2/issues )
|
||||
and to send a pull-request. Please do check that everything is fine by running the
|
||||
testsuite with `python3 -m unittest discover -v` before submitting one :)
|
||||
Do feel free to pick up [an issue]( https://github.com/jvoisin/mat2/issues )
|
||||
and to send a pull-request.
|
||||
|
||||
Before sending the pull-request, please do check that everything is fine by
|
||||
running the full test suite in GitLab. To do that, after forking mat2 in GitLab,
|
||||
you need to go in Settings -> CI/CD -> Runner and there enable shared runners.
|
||||
|
||||
Mat2 also has unit tests (that are also run in the full test suite). You can run
|
||||
them with `python3 -m unittest discover -v`.
|
||||
|
||||
If you're fixing a bug or adding a new feature, please add tests accordingly,
|
||||
this will greatly improve the odds of your merge-request getting merged.
|
||||
|
||||
If you're adding a new fileformat, please add tests for:
|
||||
|
||||
1. Getting metadata
|
||||
2. Cleaning metadata
|
||||
3. Raising `ValueError` upon a corrupted file
|
||||
|
||||
Since mat2 is written in Python3, please conform as much as possible to the
|
||||
[pep8]( https://pep8.org/ ) style; except where it makes no sense of course.
|
||||
|
||||
# Doing a release
|
||||
|
||||
1. Update the [changelog](https://0xacab.org/jvoisin/mat2/blob/master/CHANGELOG.md)
|
||||
2. Update the version in the [mat2](https://0xacab.org/jvoisin/mat2/blob/master/mat2) file
|
||||
3. Update the version in the [setup.py](https://0xacab.org/jvoisin/mat2/blob/master/setup.py) file
|
||||
4. Commit the changelog, mat2 and setup.py files
|
||||
5. Create a tag with `git tag -s $VERSION`
|
||||
6. Push the tag with `git push --tags`
|
||||
1. Update the [changelog](https://github.com/jvoisin/mat2/blob/master/CHANGELOG.md)
|
||||
2. Update the version in the [mat2](https://github.com/jvoisin/mat2/blob/master/mat2) file
|
||||
3. Update the version in the [setup.py](https://github.com/jvoisin/mat2/blob/master/setup.py) file
|
||||
4. Update the version in the [pyproject.toml](https://github.com/jvoisin/mat2/blob/master/yproject.toml) file
|
||||
5. Update the version and date in the [man page](https://github.com/jvoisin/mat2/blob/master/doc/mat2.1)
|
||||
6. Commit the modified files
|
||||
7. Create a tag with `git tag -s $VERSION`
|
||||
8. Push the commit with `git push origin master`
|
||||
9. Push the tag with `git push --tags`
|
||||
10. Download the gitlab archive of the release
|
||||
11. Diff it against the local copy
|
||||
12. If there is no difference, sign the archive with `gpg --armor --detach-sign mat2-$VERSION.tar.xz`
|
||||
13. Upload the signature on Gitlab's [tag page](https://github.com/jvoisin/mat2/tags) and add the changelog there
|
||||
14. Announce the release on the [mailing list](https://mailman.boum.org/listinfo/mat-dev)
|
||||
15. Sign'n'upload the new version on pypi with `python3 setup.py sdist bdist_wheel` then `twine upload -s dist/*`
|
||||
16. Do the secret release dance
|
||||
|
70
INSTALL.md
Normal file
@@ -0,0 +1,70 @@
|
||||
# Python ecosystem
|
||||
|
||||
If you feel like running arbitrary code downloaded over the
|
||||
internet (pypi doesn't support gpg signatures [anymore](https://github.com/pypa/python-packaging-user-guide/pull/466)),
|
||||
mat2 is [available on pypi](https://pypi.org/project/mat2/), and can be
|
||||
installed like this:
|
||||
|
||||
```
|
||||
pip3 install mat2
|
||||
```
|
||||
|
||||
# GNU/Linux
|
||||
|
||||
## Optional dependencies
|
||||
|
||||
When [bubblewrap](https://github.com/projectatomic/bubblewrap) is
|
||||
installed, mat2 uses it to sandbox any external processes it invokes.
|
||||
|
||||
## Arch Linux
|
||||
|
||||
Thanks to [kpcyrd](https://archlinux.org/packages/?maintainer=kpcyrd), there is an package available on
|
||||
[Arch linux's AUR](https://archlinux.org/packages/extra/any/mat2/).
|
||||
|
||||
## Debian
|
||||
|
||||
There is a package available in [Debian](https://packages.debian.org/search?keywords=mat2&searchon=names§ion=all) and you can install mat2 with:
|
||||
|
||||
```
|
||||
apt install mat2
|
||||
```
|
||||
|
||||
## Fedora
|
||||
|
||||
Thanks to [atenart](https://ack.tf/), there is a package available on
|
||||
[Fedora's copr]( https://copr.fedorainfracloud.org/coprs/atenart/mat2/ ).
|
||||
|
||||
First you need to enable mat2's copr:
|
||||
|
||||
```
|
||||
dnf -y copr enable atenart/mat2
|
||||
```
|
||||
|
||||
Then you can install mat2:
|
||||
|
||||
```
|
||||
dnf -y install mat2
|
||||
```
|
||||
|
||||
## Gentoo
|
||||
|
||||
mat2 is available in the [torbrowser overlay](https://github.com/MeisterP/torbrowser-overlay).
|
||||
|
||||
|
||||
# OSX
|
||||
|
||||
## Homebrew
|
||||
|
||||
mat2 is [available on homebrew](https://formulae.brew.sh/formula/mat2):
|
||||
|
||||
```
|
||||
brew install mat2
|
||||
```
|
||||
|
||||
## MacPorts
|
||||
|
||||
mat2 is [available on MacPorts](https://ports.macports.org/port/mat2/):
|
||||
|
||||
```
|
||||
port install mat2
|
||||
```
|
93
README.md
@@ -1,92 +1 @@
|
||||
```
|
||||
_____ _____ _____ ___
|
||||
| | _ |_ _|_ | Keep you data,
|
||||
| | | | | | | | _| trash your meta!
|
||||
|_|_|_|__|__| |_| |___|
|
||||
|
||||
```
|
||||
|
||||
This software is currently in **beta**, please don't use it for anything
|
||||
critical.
|
||||
|
||||
# Metadata and privacy
|
||||
|
||||
Metadata consist of information that characterizes data.
|
||||
Metadata are used to provide documentation for data products.
|
||||
In essence, metadata answer who, what, when, where, why, and how about
|
||||
every facet of the data that are being documented.
|
||||
|
||||
Metadata within a file can tell a lot about you.
|
||||
Cameras record data about when a picture was taken and what
|
||||
camera was used. Office documents like PDF or Office automatically adds
|
||||
author and company information to documents and spreadsheets.
|
||||
Maybe you don't want to disclose those information on the web.
|
||||
|
||||
This is precisely the job of MAT2: getting rid, as much as possible, of
|
||||
metadata.
|
||||
|
||||
# Requirements
|
||||
|
||||
- `python3-mutagen` for audio support
|
||||
- `python3-gi-cairo` and `gir1.2-poppler-0.18` for PDF support
|
||||
- `gir1.2-gdkpixbuf-2.0` for images support
|
||||
- `libimage-exiftool-perl` for everything else
|
||||
|
||||
Please note that MAT2 requires at least Python3.5, meaning that it
|
||||
doesn't run on [Debian Jessie](https://packages.debian.org/jessie/python3),
|
||||
|
||||
# Running the test suite
|
||||
|
||||
```bash
|
||||
$ python3 -m unittest discover -v
|
||||
```
|
||||
|
||||
# Supported formats
|
||||
|
||||
```bash
|
||||
$ python3 ./mat2 -l
|
||||
```
|
||||
|
||||
# Related software
|
||||
|
||||
- The first iteration of [MAT](http://mat.boum.org)
|
||||
- [Exiftool](https://sno.phy.queensu.ca/~phil/exiftool/mat)
|
||||
- [pdf-redact-tools](https://github.com/firstlookmedia/pdf-redact-tools), that
|
||||
tries to deal with *printer dots* too.
|
||||
- [pdfparanoia](https://github.com/kanzure/pdfparanoia), that removes
|
||||
watermarks from PDF.
|
||||
|
||||
# Contact
|
||||
|
||||
If possible, use the [issues system](https://0xacab.org/jvoisin/mat2/issues).
|
||||
If you think that a more private contact is needed (eg. for reporting security issues),
|
||||
you can email Julien (jvoisin) Voisin at `julien.voisin+mat@dustri.org`,
|
||||
using the gpg key `9FCDEE9E1A381F311EA62A7404D041E8171901CC`.
|
||||
|
||||
# License
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Copyright 2018 Julien (jvoisin) Voisin <julien.voisin+mat2@dustri.org>
|
||||
|
||||
# Thanks
|
||||
|
||||
MAT2 wouldn't exist without:
|
||||
|
||||
- the [Google Summer of Code](https://summerofcode.withgoogle.com/);
|
||||
- the fine people from [Tails]( https://tails.boum.org);
|
||||
- friends
|
||||
|
||||
Many thanks to them!
|
||||
|
||||
# This repository is deprecated, please use https://github.com/jvoisin/mat2 instead
|
BIN
data/mat2.png
Before Width: | Height: | Size: 3.1 KiB After Width: | Height: | Size: 28 KiB |
649
data/mat2.svg
@@ -1,27 +1,630 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" version="1.0">
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
inkscape:export-ydpi="384"
|
||||
inkscape:export-xdpi="384"
|
||||
inkscape:export-filename="mat2.png"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg11300"
|
||||
sodipodi:version="0.32"
|
||||
inkscape:version="0.92.2 2405546, 2018-03-11"
|
||||
sodipodi:docname="mat2.svg"
|
||||
inkscape:output_extension="org.inkscape.output.svg.inkscape"
|
||||
version="1.0"
|
||||
style="display:inline;enable-background:new"
|
||||
viewBox="0 0 128 128">
|
||||
<title
|
||||
id="title4162">Adwaita Icon Template</title>
|
||||
<defs
|
||||
id="defs3" />
|
||||
<sodipodi:namedview
|
||||
stroke="#ef2929"
|
||||
fill="#f57900"
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="0.25490196"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="4.0446508"
|
||||
inkscape:cx="99.116732"
|
||||
inkscape:cy="42.537095"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="true"
|
||||
inkscape:grid-bbox="true"
|
||||
inkscape:document-units="px"
|
||||
inkscape:showpageshadow="false"
|
||||
inkscape:window-width="1920"
|
||||
inkscape:window-height="1021"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="22"
|
||||
width="400px"
|
||||
height="300px"
|
||||
inkscape:snap-nodes="true"
|
||||
inkscape:snap-bbox="false"
|
||||
objecttolerance="7"
|
||||
gridtolerance="12"
|
||||
guidetolerance="13"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:pagecheckerboard="false"
|
||||
showguides="true"
|
||||
inkscape:guide-bbox="true"
|
||||
inkscape:locked="false"
|
||||
inkscape:measure-start="0,0"
|
||||
inkscape:measure-end="0,0"
|
||||
inkscape:object-nodes="true"
|
||||
inkscape:bbox-nodes="true"
|
||||
inkscape:snap-global="true"
|
||||
inkscape:object-paths="true"
|
||||
inkscape:snap-intersection-paths="true"
|
||||
inkscape:snap-bbox-edge-midpoints="true"
|
||||
inkscape:snap-bbox-midpoints="true"
|
||||
showborder="false"
|
||||
inkscape:snap-center="true"
|
||||
inkscape:snap-object-midpoints="true"
|
||||
inkscape:snap-midpoints="true"
|
||||
inkscape:snap-smooth-nodes="true">
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid5883"
|
||||
spacingx="2"
|
||||
spacingy="2"
|
||||
enabled="true"
|
||||
visible="true"
|
||||
empspacing="4"
|
||||
originx="0"
|
||||
originy="0" />
|
||||
<sodipodi:guide
|
||||
position="64,8"
|
||||
orientation="0,1"
|
||||
id="guide1073"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="12,64"
|
||||
orientation="1,0"
|
||||
id="guide1075"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,104"
|
||||
orientation="0,1"
|
||||
id="guide1099"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,128"
|
||||
orientation="0,1"
|
||||
id="guide993"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="104,64"
|
||||
orientation="1,0"
|
||||
id="guide995"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="9.2651362e-08,64"
|
||||
orientation="1,0"
|
||||
id="guide867"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="120,64"
|
||||
orientation="1,0"
|
||||
id="guide869"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,116"
|
||||
orientation="0,1"
|
||||
id="guide871"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid873"
|
||||
spacingx="1"
|
||||
spacingy="1"
|
||||
empspacing="8"
|
||||
color="#000000"
|
||||
opacity="0.49019608"
|
||||
empcolor="#000000"
|
||||
empopacity="0.08627451"
|
||||
dotted="true" />
|
||||
<sodipodi:guide
|
||||
position="24,64"
|
||||
orientation="1,0"
|
||||
id="guide877"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="116,64"
|
||||
orientation="1,0"
|
||||
id="guide879"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,120"
|
||||
orientation="0,1"
|
||||
id="guide881"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,12"
|
||||
orientation="0,1"
|
||||
id="guide883"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="8,64"
|
||||
orientation="1,0"
|
||||
id="guide885"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="128,64"
|
||||
orientation="1,0"
|
||||
id="guide887"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,0"
|
||||
orientation="0,1"
|
||||
id="guide897"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,24"
|
||||
orientation="0,1"
|
||||
id="guide899"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="256,256"
|
||||
orientation="-0.70710678,0.70710678"
|
||||
id="guide950"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,64"
|
||||
orientation="0.70710678,0.70710678"
|
||||
id="guide952"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata4">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:creator>
|
||||
<cc:Agent>
|
||||
<dc:title>GNOME Design Team</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:creator>
|
||||
<dc:source />
|
||||
<cc:license
|
||||
rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
|
||||
<dc:title>Adwaita Icon Template</dc:title>
|
||||
<dc:subject>
|
||||
<rdf:Bag />
|
||||
</dc:subject>
|
||||
<dc:date />
|
||||
<dc:rights>
|
||||
<cc:Agent>
|
||||
<dc:title />
|
||||
</cc:Agent>
|
||||
</dc:rights>
|
||||
<dc:publisher>
|
||||
<cc:Agent>
|
||||
<dc:title />
|
||||
</cc:Agent>
|
||||
</dc:publisher>
|
||||
<dc:identifier />
|
||||
<dc:relation />
|
||||
<dc:language />
|
||||
<dc:coverage />
|
||||
<dc:description />
|
||||
<dc:contributor>
|
||||
<cc:Agent>
|
||||
<dc:title />
|
||||
</cc:Agent>
|
||||
</dc:contributor>
|
||||
</cc:Work>
|
||||
<cc:License
|
||||
rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
|
||||
<cc:permits
|
||||
rdf:resource="http://creativecommons.org/ns#Reproduction" />
|
||||
<cc:permits
|
||||
rdf:resource="http://creativecommons.org/ns#Distribution" />
|
||||
<cc:requires
|
||||
rdf:resource="http://creativecommons.org/ns#Notice" />
|
||||
<cc:requires
|
||||
rdf:resource="http://creativecommons.org/ns#Attribution" />
|
||||
<cc:permits
|
||||
rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
|
||||
<cc:requires
|
||||
rdf:resource="http://creativecommons.org/ns#ShareAlike" />
|
||||
</cc:License>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
fill="#27628a"
|
||||
stroke="none">
|
||||
<path
|
||||
d="M0 5120 l0 -5120 3000 0 3000 0 0 5120 0 5120 -3000 0 -3000 0 0 -5120z" />
|
||||
</g>
|
||||
<g
|
||||
fill="#7fcae7"
|
||||
stroke="none">
|
||||
<path
|
||||
d="M 0,5120 V 0 h 3000 3000 v 5120 5120 H 3000 0 Z m 3041,3965 c 257.1951,-231.2173 270.8768,-244.4494 1132,-978 100.0843,-559.7796 173.9788,-986.5359 279,-1586 -165.7863,-405.0485 -178.8353,-430.8722 -292,-721 650.6072,-1421.1218 667.3936,-1452.2872 1190,-2550 -2109.4504,-0.035 -2130.9695,-0.025 -4468.86586,0.037 72.33788,69.7996 74.76441,71.6861 148.86586,140.963 -129.0483,91.5488 -134.68166,93.6858 -367,225 175.86245,383.2532 323.97381,668.4741 527,1073 35.6121,292.0899 72.3384,584.0406 109,876 5.074,391.6586 9.0034,783.3294 13,1175 314.3202,597.9247 654.4179,1182.5892 964,1783 88.7542,312.5107 121.9361,512.8332 194,862 95.2778,168.6736 102.3771,181.1881 273,473 113.1881,-286.567 245.9452,-613.0146 298,-773 z" />
|
||||
</g>
|
||||
<g
|
||||
fill="#c0dede"
|
||||
stroke="none">
|
||||
<path
|
||||
d="M0 1625 l0 -1625 3000 0 3000 0 0 1625 0 1625 -3000 0 -3000 0 0 -1625z" />
|
||||
</g>
|
||||
<g
|
||||
fill="#ffffff"
|
||||
stroke="none">
|
||||
<path
|
||||
d="M 881.01695,3249.9206 C 1286.0459,3091.4742 1546.5278,3035.4925 1889,2924 c 129.95,-482.4131 173.4726,-686.2614 331,-1262 132.796,95.3371 216.2935,142.9991 359,242 116.2556,-360.389 199.5642,-636.2515 320,-1025 108.0281,-100.84978 136.3812,-131.67871 296,-299 10,0 254,309 487,616 83.6789,470.193 92.832,516.3155 215,1032 422.9371,260.0129 459.4089,278.2641 878,528 0,69.3333 0,138.6667 0,208 253.7343,134.9322 263.2776,139.2776 570,286 H 3107 c -2226,0 -2219.49894,-0.1145 -2225.98305,-0.079 z" />
|
||||
id="layer1"
|
||||
inkscape:label="Icon"
|
||||
inkscape:groupmode="layer"
|
||||
style="display:inline"
|
||||
transform="translate(0,-172)">
|
||||
<g
|
||||
inkscape:groupmode="layer"
|
||||
id="layer2"
|
||||
inkscape:label="baseplate"
|
||||
style="display:none">
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Normal';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
|
||||
x="7.9499588"
|
||||
y="148.65199"
|
||||
id="context"
|
||||
inkscape:label="context"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan2716"
|
||||
x="7.9499588"
|
||||
y="148.65199"
|
||||
style="font-size:5.33333349px;stroke-width:0.33264872">apps</tspan></text>
|
||||
<text
|
||||
inkscape:label="icon-name"
|
||||
id="text3021"
|
||||
y="157.23398"
|
||||
x="7.7533054"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
|
||||
xml:space="preserve"><tspan
|
||||
y="157.23398"
|
||||
x="7.7533054"
|
||||
id="tspan3023"
|
||||
sodipodi:role="line"
|
||||
style="font-size:5.33333349px;stroke-width:0.33264872">org.gnome.</tspan></text>
|
||||
<g
|
||||
style="display:inline;fill:#000000;enable-background:new"
|
||||
transform="matrix(7.9911709,0,0,8.0036407,-167.7909,-4846.0776)"
|
||||
id="g12027"
|
||||
inkscape:export-xdpi="12"
|
||||
inkscape:export-ydpi="12" />
|
||||
<rect
|
||||
style="display:inline;overflow:visible;visibility:visible;fill:#f0f0f0;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.5;marker:none;enable-background:accumulate"
|
||||
id="rect13805"
|
||||
width="128"
|
||||
height="128"
|
||||
x="9.2651362e-08"
|
||||
y="172"
|
||||
inkscape:label="512x512" />
|
||||
<g
|
||||
id="g883"
|
||||
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
|
||||
transform="translate(-24,24)" />
|
||||
<g
|
||||
id="g900"
|
||||
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
|
||||
transform="translate(-24,24)" />
|
||||
<g
|
||||
id="g1168"
|
||||
transform="matrix(0.25,0,0,0.25,6.9488522e-8,225)">
|
||||
<circle
|
||||
cx="256"
|
||||
cy="44"
|
||||
r="240"
|
||||
id="path1142"
|
||||
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
|
||||
<rect
|
||||
ry="32"
|
||||
rx="32"
|
||||
y="-180"
|
||||
x="96"
|
||||
height="448"
|
||||
width="319.99979"
|
||||
id="rect1110"
|
||||
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
|
||||
<rect
|
||||
ry="32"
|
||||
rx="32"
|
||||
y="-164"
|
||||
x="48"
|
||||
height="416"
|
||||
width="416"
|
||||
id="rect1110-8"
|
||||
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
|
||||
<rect
|
||||
ry="32"
|
||||
rx="32"
|
||||
y="-116"
|
||||
x="32"
|
||||
height="320"
|
||||
width="448"
|
||||
id="rect1110-8-9"
|
||||
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
|
||||
</g>
|
||||
</g>
|
||||
<g
|
||||
inkscape:groupmode="layer"
|
||||
id="layer9"
|
||||
inkscape:label="hires"
|
||||
style="display:none" />
|
||||
<g
|
||||
id="g944"
|
||||
transform="matrix(1,0,0,0.93868822,0,14.545966)">
|
||||
<path
|
||||
style="fill:#99c1f1;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.41013032;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 50.53899,195.25817 6.396029,-11.43484 1.082405,-0.87215 4.821622,-10.46578 0.885604,-0.38763 2.558412,4.74837 2.755213,9.59364 1.672808,1.35667 3.542417,-0.87215 5.707227,12.59771 12.988859,9.59364 3.050415,3.87621 v 2.71335 l -16.334476,-1.25977 -7.084833,1.45359 -4.428021,-0.38763 -7.084833,0.29072 -11.414452,-0.58143 -3.640817,0.96905 -9.052843,-1.64739 -2.066409,0.0969 -1.476008,-0.48452 1.377607,-1.45358 1.869609,-1.06596 6.002428,-11.04722 1.279206,0.48453 5.412025,-6.49267 z"
|
||||
id="path3455"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 49.358184,215.31759 -3.444016,0.9206 -9.003641,-1.74429 -1.918809,0.24226 -1.623608,-0.58143 1.574407,-1.50204 1.722008,-0.96905 5.953228,-11.09567 1.279205,0.53298 5.510426,-6.54112 0.344401,0.29072 -4.969223,10.27197 2.214011,1.93811 -0.246001,4.45765 z"
|
||||
id="path3459"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 50.438601,195.22912 6.470906,-11.5803 1.113274,-0.6167 4.870575,-10.62099 0.904535,-0.41113 -0.417479,3.3576 0.626218,0.89079 0.834954,15.89722 1.391594,3.70021 -3.687722,5.34476 0.208739,1.37044 -0.347898,5.68737 1.87865,3.28908 7.375442,2.19272 1.252433,2.19272 -0.487057,0.13704 -4.244358,-0.54818 -6.540486,0.41114 -2.435287,-2.19272 -0.626216,-4.24839 -2.087389,-6.16703 -4.035619,-3.42612 -2.087388,-4.38544"
|
||||
id="path3461"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 32.069579,218.11563 c -0.06958,-0.27409 0.695796,-1.23341 0.695796,-1.23341 l 2.783185,-0.0685 1.739491,2.26124 4.661836,5.13919 0.139158,1.57602 -4.174778,5.96145 -0.487057,6.16703 -2.922344,2.26124 -0.06958,1.57601 h -1.113274 l -1.322013,-3.08351 2.017809,-14.86938 z"
|
||||
id="path3400"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 48.83827,222.43255 1.600331,-3.01499 -0.695796,-0.75375 -5.635951,-1.16488 -3.200663,0.82227 -0.06958,1.50749 1.53075,0.75375 1.461174,2.67237 -0.208739,1.71307 1.739489,1.02783 2.296129,-0.54818 z"
|
||||
id="path3402"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 51.203977,217.70449 1.113274,-0.68522 2.365707,1.02784 1.322013,2.67237 -2.226548,2.26125 -1.322013,-0.82227 -1.322013,-0.61671 0.834956,-1.71306 z"
|
||||
id="path3404"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 43.758957,226.61242 1.948228,0.68522 0.417479,1.91863 -0.626216,1.30193 -1.182854,0.34261 -1.113275,1.02784 -0.765376,3.63169 0.626218,3.01499 -1.252435,0.68522 -0.487057,-0.41113 -0.278319,-1.5075 -1.80907,-1.37045 -0.765376,-3.49464 3.618141,-3.42613 1.669912,-2.67237"
|
||||
id="path3406"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 50.57776,223.25481 0.13916,0.68523 -2.783187,3.83726 0.06958,1.64454 -0.626218,1.50749 -1.60033,1.43897 -0.06958,0.75375 1.600333,1.91863 1.182854,3.08351 0.974114,0.68523 1.669911,-2.80942 -0.278318,-3.22056 3.966039,-3.3576 0.695796,-1.09636 -3.270243,-4.45396 z"
|
||||
id="path3408"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 51.839954,236.39856 -0.834826,1.58948 0.166966,1.26061 1.057445,1.97315 0.500896,-0.32886 0.389584,-1.7539 1.447031,-1.151 2.337512,-4.0559 -0.22262,-1.04138 -1.947927,-1.69909 -2.114892,1.31542 0.278276,3.39819 z"
|
||||
id="path3410"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 57.593778,229.84236 -1.043694,1.09636 0.765375,0.89079 1.043695,-0.20556 v -1.43898 z"
|
||||
id="path3412"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 59.344793,218.25267 -0.765376,2.19272 -0.695796,0.27409 -0.695796,1.91863 -2.226548,2.26124 2.574446,3.56317 h 1.182854 l 0.487057,0.75375 0.626217,1.09636 1.948229,1.30193 2.922346,-0.6167 1.53075,-2.26125 -1.043694,-3.3576 -1.043693,-1.64454 1.322011,-2.60385 -0.904535,-1.37045 -2.226548,0.0685 z"
|
||||
id="path3416"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.150522,238.17554 -0.518261,1.78635 1.036524,2.16915 1.684349,-2.04155 -0.647826,-2.16915 z"
|
||||
id="path3418"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 66.789813,223.66595 1.600333,-0.75375 1.739489,-4.11135 2.922346,0.75375 1.322013,0.41114 0.139159,6.7152 -1.461172,1.02784 -2.226548,4.17987 -0.834956,-0.41114 -0.626216,0.95932 -2.574448,-0.61671 0.904537,-3.08351 z"
|
||||
id="path3422"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 77.505077,218.59529 1.182854,-0.20557 2.435287,1.30193 -0.974115,1.02783 -2.087389,3.63169 -1.391593,0.0685 -1.113274,-0.61671 1.043695,-2.19271 z"
|
||||
id="path3426"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 73.539038,231.06638 1.043695,-1.30193 1.043694,-2.80942 4.522676,1.71306 -0.974115,2.87795 -1.94823,-0.41114 -1.80907,1.09636 z"
|
||||
id="path3428"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 78.200873,225.6531 7.932079,-7.94861 3.339822,1.09636 0.974115,0.13705 1.600331,-1.02784 3.339822,0.0685 -5.079314,12.81371 -3.200663,-1.98715 0.139161,-1.16489 -0.695798,-0.6167 -0.208737,-1.16488 -1.043696,0.27409 -3.200663,2.39829 z"
|
||||
id="path3430"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 81.401536,230.99786 c 0,-0.2741 2.156968,-1.98716 2.156968,-1.98716 l 2.017811,1.30193 -0.904535,2.32976 -1.182855,0.75375 z"
|
||||
id="path3432"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 81.679855,237.8501 0.765375,-1.91863 0.208739,-1.2334 2.156969,0.20557 2.156968,-2.87795 3.409403,1.02784 -0.904535,2.80942 -0.904535,0.34261 -0.626218,2.80943 1.043694,4.72805 -0.904535,1.09636 -1.80907,-2.19272 -0.626217,-1.37045 z"
|
||||
id="path3434"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 78.131294,238.60385 0.626216,3.08351 -0.626216,3.22056 0.765375,0.95931 -0.626216,5.68737 2.504866,2.32976 1.87865,-0.47965 0.417478,-3.35761 1.669911,-0.0685 3.757301,-1.8501 -0.20874,-1.98716 -2.226548,-0.20556 -1.182854,-3.01499 -3.200662,-2.05568 -1.252434,-2.39828 z"
|
||||
id="path3436"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 84.532619,251.41755 -0.278318,1.43898 -0.695797,0.6167 1.322013,2.67238 2.365709,-0.20557 1.53075,-2.94647 -2.365707,-1.98715 z"
|
||||
id="path3438"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 64.993183,249.51154 -1.14959,2.51583 0.766392,1.69818 2.618509,0.25159 0.702526,1.19502 1.021857,2.39003 -0.574794,2.32714 3.89583,1.88688 0.95799,-1.06923 0.510928,-4.59139 -4.023561,-2.70451 -0.127732,-4.21402 z"
|
||||
id="path3440"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.912822,251.00642 h 1.391592 l 2.574446,0.75375 1.391593,1.98715 1.461172,1.30193 -0.139159,3.42612 -3.409402,1.57602 -0.974115,-1.85011 0.626217,-3.3576 -3.270243,-1.85011 z"
|
||||
id="path3442"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.147446,264.77944 1.80907,-1.98715 3.339822,-1.85011 1.322013,-0.0685 4.661835,-3.63169 1.391594,0.34261 0.556637,4.52248 -3.200664,4.04283 -2.852765,-0.82227 -1.80907,0.54818 -0.765376,1.43897 -2.087389,0.68522 z"
|
||||
id="path3444"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 75.06979,272.93361 0.765376,-1.30192 1.252433,-0.41114 0.904535,-2.87794 1.94823,-0.61671 0.556637,2.60386 -3.339822,6.0985 -1.391593,-0.0685 z"
|
||||
id="path3446"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 71.451649,268.20556 -1.252433,1.85011 2.504867,1.98715 0.765376,0.82227 1.73949,-2.39829 -2.296127,-2.80942 -1.461173,0.27409 z"
|
||||
id="path3448"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 62.24531,254.0948 1.461172,1.02784 1.948229,0.54818 0.487058,1.64454 -1.461173,2.67237 -0.06958,1.78159 -1.669911,1.85011 -1.252433,-2.05568 0.487057,-2.80942 -1.391593,-0.34261 -0.904535,-2.80942 z"
|
||||
id="path3450"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 47.585836,246.55246 -0.695796,3.70021 -0.139159,1.37045 1.87865,0.68523 1.391592,0.95931 1.809071,-1.64454 -0.417478,-0.95931 z"
|
||||
id="path3452"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 54.682958,247.78586 -1.043694,1.02784 0.208739,1.98715 1.600331,0.89079 0.626217,-0.47965 0.06958,-2.26125 z"
|
||||
id="path3454"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 48.629531,258.95503 4.800994,-6.16703 3.409402,0.82227 0.556637,1.78159 3.131083,4.79657 -1.669911,5.82441 -3.200663,-1.37045 -0.417478,-3.49464 -2.087388,1.30192 z"
|
||||
id="path3456"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 45.915924,252.71948 -0.487056,1.98715 1.60033,1.57602 1.461174,-0.20557 -0.347899,-2.19272 z"
|
||||
id="path3458"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 67.555189,261.6274 -1.80907,2.80943 -2.435287,8.42826 2.783185,3.76874 1.461172,-0.0685 1.113274,-2.12419 1.043696,-0.20557 0.487057,-1.09636 -1.043694,-4.45396 1.182853,-4.31692 z"
|
||||
id="path3460"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 58.718577,267.79443 1.600331,-1.23341 2.017809,1.71306 -0.904535,1.85011 z"
|
||||
id="path3462"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 58.509838,276.49678 2.156968,-4.591 1.391593,-0.27409 0.834955,1.50749 -2.017809,5.13919 z"
|
||||
id="path3464"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 71.242911,274.02997 1.391592,0.20557 1.043694,3.01499 2.01781,0.68522 1.530751,1.57602 -0.904535,2.87795 -2.365707,2.32976 -0.139159,3.56317 -1.322013,1.98715 -2.504867,-1.85011 -0.278318,-2.67237 -1.530752,-1.78159 -1.113274,-3.08351 3.61814,-4.17987 z"
|
||||
id="path3466"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 62.893354,276.5653 3.270244,1.16489 0.06958,3.70021 -0.556637,0.68523 0.974115,3.70021 1.252433,1.64454 0.06958,3.08351 -2.017809,1.37045 -2.574447,8.08566 -2.574447,-1.30193 -1.948229,-9.79872 z"
|
||||
id="path3468"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 58.440258,283.5546 h 0.556637 l 0.417478,0.95931 -0.208739,1.30193 -1.461172,0.13704 z"
|
||||
id="path3472"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 56.700767,279.16916 -1.113274,0.95931 0.834956,2.80943 1.600331,0.20556 0.487058,-2.05567 -0.695796,-1.91863 z"
|
||||
id="path3474"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 53.152207,272.17987 0.139159,5.13918 1.87865,1.23341 0.834955,-0.54818 0.904535,-3.63169 1.530752,-1.57602 -1.669911,-3.97431 -3.548561,3.08352 z"
|
||||
id="path3476"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 45.915924,258.33832 -0.208739,3.83726 -4.731414,3.97431 1.948229,2.80942 8.488716,0.82227 0.417478,1.98715 1.043694,-0.75375 0.487057,-2.19272 1.182854,-1.64454 -0.417478,-1.09635 -1.87865,-2.60386 -3.757299,-1.37045 -1.461174,-3.22056 z"
|
||||
id="path3480"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 40.279975,263.68308 1.669912,0.6167 3.061502,-6.37259 -0.904535,-5.61884 -2.504867,-0.34262 -1.391592,-1.2334 2.156968,-7.606 -2.087388,-4.45396 -3.409402,1.57602 -0.834956,3.42612 -1.87865,0.20557 -0.347898,2.1242 1.530752,1.64454 h 1.322013 l 0.626217,3.90578 2.296127,5.61884 -0.347898,2.19272 z"
|
||||
id="path3482"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 66.531337,247.61066 -0.590018,-0.31657 -0.420783,-1.71262 0.427793,-0.66945 1.306823,-1.13114 2.316342,-1.38746 1.06612,0.23465 -0.01701,2.21105 -2.36166,3.35302 z"
|
||||
id="path4284"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:transform-center-x="4.9927099"
|
||||
inkscape:transform-center-y="-9.3161687" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.373733,232.22199 -0.815102,1.03206 4.017286,4.12827 1.571981,0.17201 1.339096,-0.86006 0.931544,0.63071 2.387083,-2.98152 -2.794634,-0.91739 -3.027519,0.22934 z"
|
||||
id="path3601"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 57.407878,237.1102 -1.301737,2.34289 -1.301738,0.61888 -0.17955,1.45878 -4.488748,1.54719 -0.403989,1.50299 0.314213,0.30944 1.032412,0.0884 v 1.41457 l 1.660839,1.50299 2.154598,-1.94504 1.571064,0.35364 2.738136,-1.94504 -1.436399,-2.56392 0.987525,-3.44803 -0.583538,-1.37037 z"
|
||||
id="path3603"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 62.104217,246.96003 5.843936,-6.55723 0.659867,-2.66044 2.221783,-0.40757 -0.386451,-3.39556 -2.000988,-0.60704 -6.246127,-0.36572 -2.624948,2.5137 1.519708,2.75102 -0.347742,5.51876 z"
|
||||
id="path3605"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 71.024647,249.63275 5.822153,1.31875 1.047988,-3.89891 -1.280874,-1.43343 0.523995,-6.02038 -3.551515,5.275 0.34933,2.06413 -2.037753,0.80272 -1.164431,0.45869 z"
|
||||
id="path3607"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 59.099222,247.24427 -2.095974,1.72011 -0.05822,1.60543 0.465772,1.72011 1.455539,0.97473 -0.407551,0.97473 2.328861,-0.34402 2.27064,-2.86685 -1.571981,-0.57337 -0.640437,-2.86685 -1.51376,-0.40136 z"
|
||||
id="path3609"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 44.078067,234.34346 0.291107,4.47228 -1.863089,1.43342 2.095976,3.72691 2.037753,0.0573 2.27064,-3.55489 -2.969297,-4.98831 z"
|
||||
id="path3611"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 44.660282,245.46683 -3.318627,4.30027 1.339096,1.26141 2.561747,-0.28668 1.222652,-3.15354 z"
|
||||
id="path3613"
|
||||
inkscape:connector-curvature="0" />
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
||||
|
Before Width: | Height: | Size: 1.7 KiB After Width: | Height: | Size: 34 KiB |
51
doc/comparison_to_others.md
Normal file
@@ -0,0 +1,51 @@
|
||||
# Exiftool
|
||||
|
||||
mat2 is in fact using exiftool to extract metadata from files,
|
||||
but not to remove them. The previous iteration of mat2, MAT,
|
||||
was using exiftool to remove metadata, which lead to several cases where
|
||||
they weren't correctly removed, if at all.
|
||||
For example, [Exiftool's documentation](https://www.sno.phy.queensu.ca/~phil/exiftool/TagNames/PDF.html)
|
||||
states the following with regard to PDF:
|
||||
|
||||
> All metadata edits are reversible. While this would normally be considered an
|
||||
advantage, it is a potential security problem because old information is never
|
||||
actually deleted from the file.
|
||||
|
||||
To remove metadata, mat2 usually re-render the file completely, eliminating
|
||||
all possible original metadata. See the `implementation_notes.md` file for
|
||||
details.
|
||||
|
||||
|
||||
# jpegoptim, optipng, …
|
||||
|
||||
While designed to reduce as much as possible the size of pictures,
|
||||
those software can be used to remove metadata. They usually have excellent
|
||||
support for a single picture format, and can be used in place of mat2 for them.
|
||||
|
||||
|
||||
# PDF Redact Tools
|
||||
|
||||
[PDF Redact Tools](https://github.com/firstlookmedia/pdf-redact-tools) is
|
||||
software developed by the people from [First Look
|
||||
Media](https://firstlook.media/), the entity behind, amongst other things,
|
||||
[The Intercept](https://theintercept.com/).
|
||||
|
||||
The tool uses roughly the same approach than mat2 to deal with PDF,
|
||||
which is unfortunately the only fileformat that it does support.
|
||||
It's interesting to note that it has counter-measures against
|
||||
[yellow dots](https://en.wikipedia.org/wiki/Machine_Identification_Code),
|
||||
a capacity that mat2 doesn't have.
|
||||
|
||||
|
||||
# Exiv2
|
||||
|
||||
[Exiv2](https://www.exiv2.org/) was considered for mat2,
|
||||
but it currently misses a lot of metadata.
|
||||
|
||||
|
||||
# Others non open source software/online service
|
||||
|
||||
There are a lot of closed-source software and online service claiming to remove
|
||||
metadata from your files, but since there is no way to actually verify that
|
||||
they're effectively removing them, let alone adding unique markers, they
|
||||
shouldn't be used.
|
@@ -4,31 +4,57 @@ Implementation notes
|
||||
Lightweight cleaning mode
|
||||
-------------------------
|
||||
|
||||
Due to *popular* request, MAT2 is providing a *lightweight* cleaning mode,
|
||||
Due to *popular* request, mat2 is providing a *lightweight* cleaning mode,
|
||||
that only cleans the superficial metadata of your file, but not
|
||||
the ones that might be in **embeded** resources. Like for example,
|
||||
the ones that might be in **embedded** resources. Like for example,
|
||||
images in a PDF or an office document.
|
||||
|
||||
Revisions handling
|
||||
------------------
|
||||
|
||||
Revisions are handled according to the principle of least astonishment: they
|
||||
are entirely removed.
|
||||
|
||||
- Either the users aren't aware of the revisions, are thus they should be
|
||||
deleted. For example journalists that are editing a document to erase
|
||||
mentions sources mentions.
|
||||
|
||||
- Or they are aware of it, and will likely not expect mat2 to be able to keep
|
||||
the revisions, that are basically traces about how, when and who edited the
|
||||
document.
|
||||
|
||||
|
||||
Race conditions
|
||||
---------------
|
||||
|
||||
MAT2 does its very best to avoid crashing at runtime. This is why it's checking
|
||||
if the file is valid __at parser creation__. MAT2 doesn't take any measure to
|
||||
mat2 does its very best to avoid crashing at runtime. This is why it's checking
|
||||
if the file is valid __at parser creation__. mat2 doesn't take any measure to
|
||||
ensure that the file is not changed between the time the parser is
|
||||
instantiated, and the call to clean or show the metadata.
|
||||
|
||||
Symlink attacks
|
||||
---------------
|
||||
|
||||
MAT2 output predictable filenames (like yourfile.jpg.cleaned).
|
||||
mat2 output predictable filenames (like yourfile.jpg.cleaned).
|
||||
This may lead to symlink attack. Please check if you OS prevent
|
||||
against them
|
||||
|
||||
Archives handling
|
||||
-----------------
|
||||
|
||||
MAT2 doesn't support archives yet, because we haven't found an usable way to ask the user
|
||||
what to do when a non-supported files are encountered.
|
||||
By default, when cleaning a non-support file format in an archive,
|
||||
mat2 will abort with a detailed error message.
|
||||
While strongly discouraged, it's possible to override this behaviour to force
|
||||
the exclusion, or inclusion of unknown files into the cleaned archive.
|
||||
|
||||
While Python's [zipfile](https://docs.python.org/3/library/zipfile.html) module
|
||||
provides *safe* way to extract members of a zip archive, the
|
||||
[tarfile](https://docs.python.org/3/library/tarfile.html) one doesn't,
|
||||
meaning that it's up to mat2 to implement safety checks. Currently,
|
||||
it defends against path-traversal, both relative and absolute,
|
||||
symlink-related attacks, setuid/setgid attacks, duplicate members, block and
|
||||
char devices, … but there might still be dragons lurking there.
|
||||
|
||||
|
||||
PDF handling
|
||||
------------
|
||||
@@ -39,10 +65,10 @@ didn't remove any *deep metadata*, like the ones in embedded pictures. This was
|
||||
on of the reason MAT was abandoned: the absence of satisfying solution to
|
||||
handle PDF. But apparently, people are ok with [pdf redact
|
||||
tools](https://github.com/firstlookmedia/pdf-redact-tools), that simply
|
||||
transform the PDF into images. So this is what's MAT2 is doing too.
|
||||
transform the PDF into images. So this is what's mat2 is doing too.
|
||||
|
||||
Of course, it would be possible to detect images in PDf file, and process them
|
||||
with MAT2, but since a PDF can contain a lot of things, like images, videos,
|
||||
with mat2, but since a PDF can contain a lot of things, like images, videos,
|
||||
javascript, pdf, blobs, … this is the easiest and safest way to clean them.
|
||||
|
||||
Images handling
|
||||
@@ -51,3 +77,11 @@ Images handling
|
||||
When possible, images are handled like PDF: rendered on a surface, then saved
|
||||
to the filesystem. This ensures that every metadata is removed.
|
||||
|
||||
XML attacks
|
||||
-----------
|
||||
|
||||
Since our threat model conveniently excludes files crafted to specifically
|
||||
bypass mat2, fileformats containing harmful XML are out of our scope.
|
||||
But since mat2 is using [etree](https://docs.python.org/3/library/xml.html#xml-vulnerabilities)
|
||||
to process XML, it's "only" vulnerable to DoS, and not memory corruption:
|
||||
odds are that the user will notice that the cleaning didn't succeed.
|
||||
|
99
doc/mat2.1
Normal file
@@ -0,0 +1,99 @@
|
||||
.TH mat2 "1" "January 2025" "mat2 0.13.5" "User Commands"
|
||||
|
||||
.SH NAME
|
||||
mat2 \- the metadata anonymisation toolkit 2
|
||||
|
||||
.SH SYNOPSIS
|
||||
\fBmat2\fR [\-h] [\-v] [\-l] [\-V] [-s | -L] [\fIfiles\fR [\fIfiles ...\fR]]
|
||||
|
||||
.SH DESCRIPTION
|
||||
.B mat2
|
||||
removes metadata from various fileformats. It supports a wide variety of file
|
||||
formats, audio, office, images, …
|
||||
|
||||
Careful, mat2 does not clean files in-place, instead, it will produce a file with the word
|
||||
"cleaned" between the filename and its extension, for example "filename.cleaned.png"
|
||||
for a file named "filename.png".
|
||||
|
||||
.SH OPTIONS
|
||||
.SS "positional arguments:"
|
||||
.TP
|
||||
\fBfiles\fR
|
||||
the files to process
|
||||
.SS "optional arguments:"
|
||||
.TP
|
||||
\fB\-h\fR, \fB\-\-help\fR
|
||||
show this help message and exit
|
||||
.TP
|
||||
\fB\-v\fR, \fB\-\-version\fR
|
||||
show program's version number and exit
|
||||
.TP
|
||||
\fB\-l\fR, \fB\-\-list\fR
|
||||
list all supported fileformats
|
||||
.TP
|
||||
\fB\-\-check\-dependencies\fR
|
||||
check if mat2 has all the dependencies it needs
|
||||
.TP
|
||||
\fB\-V\fR, \fB\-\-verbose\fR
|
||||
show more verbose status information
|
||||
.TP
|
||||
\fB\-\-unknown-members\fR \fIpolicy\fR
|
||||
how to handle unknown members of archive-style files (policy should be one of: abort, omit, keep)
|
||||
.TP
|
||||
\fB\-s\fR, \fB\-\-show\fR
|
||||
list harmful metadata detectable by mat2 without removing them
|
||||
.TP
|
||||
\fB\-L\fR, \fB\-\-lightweight\fR
|
||||
remove SOME metadata
|
||||
.TP
|
||||
\fB\--no-sandbox\fR
|
||||
disable bubblewrap's sandboxing
|
||||
.TP
|
||||
\fB\--inplace\fR
|
||||
clean in place, without backup
|
||||
|
||||
.SH EXAMPLES
|
||||
To remove all the metadata from a PDF file:
|
||||
.PP
|
||||
.nf
|
||||
.RS
|
||||
mat2 ./myfile.pdf
|
||||
.RE
|
||||
.fi
|
||||
.PP
|
||||
|
||||
.SH NOTES ABOUT METADATA
|
||||
|
||||
While mat2 is doing its very best to display metadata when the --show flag is
|
||||
passed, it doesn't mean that a file is clean from any metadata if mat2 doesn't
|
||||
show any. There is no reliable way to detect every single possible metadata for
|
||||
complex file formats.
|
||||
.PP
|
||||
This is why you shouldn't rely on metadata's presence to decide if your file must
|
||||
be cleaned or not.
|
||||
.PP
|
||||
Moreover, mat2 goes to great lengths to make sure that as much metadata as
|
||||
possible are removed. This might sometimes result in a loss of quality of the
|
||||
processed files. For example, textual based pdf file converted into image based
|
||||
one means that it'll be no longer possible to select text in them. If you're
|
||||
experiencing this, you might want to give the lightweight cleaning mode a try,
|
||||
but keep in mind by doing so, some metadata \fBwon't be cleaned\fR.
|
||||
|
||||
|
||||
.SH BUGS
|
||||
|
||||
While mat2 does its very best to remove every single metadata,
|
||||
it's still in beta, and \fBsome\fR might remain. Should you encounter
|
||||
some issues, check the bugtracker: https://github.com/jvoisin/mat2/issues
|
||||
.PP
|
||||
Please use accordingly and be careful.
|
||||
|
||||
.SH AUTHOR
|
||||
This software was made by Julien (jvoisin) Voisin with the support of the Tails project.
|
||||
|
||||
.SH COPYRIGHT
|
||||
This software is released on LGPLv3.
|
||||
|
||||
.SH "SEE ALSO"
|
||||
.BR exiftool (1p)
|
||||
.BR pdf-redact-tools (1)
|
@@ -3,7 +3,7 @@ Threat Model
|
||||
|
||||
The Metadata Anonymisation Toolkit 2 adversary has a number
|
||||
of goals, capabilities, and counter-attack types that can be
|
||||
used to guide us towards a set of requirements for the MAT2.
|
||||
used to guide us towards a set of requirements for the mat2.
|
||||
|
||||
This is an overhaul of MAT's (the first iteration of the software) one.
|
||||
|
||||
@@ -53,7 +53,7 @@ Adversary
|
||||
user. This is the strongest position for the adversary to
|
||||
have. In this case, the adversary is capable of inserting
|
||||
arbitrary, custom watermarks specifically for tracking
|
||||
the user. In general, MAT2 cannot defend against this
|
||||
the user. In general, mat2 cannot defend against this
|
||||
adversary, but we list it for completeness' sake.
|
||||
|
||||
- The adversary created the document for a group of users.
|
||||
@@ -65,7 +65,7 @@ Adversary
|
||||
- The adversary did not create the document, the weakest
|
||||
position for the adversary to have. The file format is
|
||||
(most of the time) standard, nothing custom is added:
|
||||
MAT2 must be able to remove all metadata from the file.
|
||||
mat2 must be able to remove all metadata from the file.
|
||||
|
||||
|
||||
Requirements
|
||||
@@ -73,28 +73,28 @@ Requirements
|
||||
|
||||
* Processing
|
||||
|
||||
- MAT2 *should* avoid interactions with information.
|
||||
- mat2 *should* avoid interactions with information.
|
||||
Its goal is to remove metadata, and the user is solely
|
||||
responsible for the information of the file.
|
||||
|
||||
- MAT2 *must* warn when encountering an unknown
|
||||
format. For example, in a zipfile, if MAT encounters an
|
||||
- mat2 *must* warn when encountering an unknown
|
||||
format. For example, in a zipfile, if mat2 encounters an
|
||||
unknown format, it should warn the user, and ask if the
|
||||
file should be added to the anonymised archive that is
|
||||
produced.
|
||||
|
||||
- MAT2 *must* not add metadata, since its purpose is to
|
||||
- mat2 *must* not add metadata, since its purpose is to
|
||||
anonymise files: every added items of metadata decreases
|
||||
anonymity.
|
||||
|
||||
- MAT2 *should* handle unknown/hidden metadata fields,
|
||||
- mat2 *should* handle unknown/hidden metadata fields,
|
||||
like proprietary extensions of open formats.
|
||||
|
||||
- MAT2 *must not* fail silently. Upon failure,
|
||||
MAT2 *must not* modify the file in any way.
|
||||
- mat2 *must not* fail silently. Upon failure,
|
||||
mat2 *must not* modify the file in any way.
|
||||
|
||||
- MAT2 *might* leak the fact that MAT2 was used on the file,
|
||||
- mat2 *might* leak the fact that mat2 was used on the file,
|
||||
since it might be uncommon for some file formats to come
|
||||
without any kind of metadata, an adversary might suspect that
|
||||
the user used MAT2 on certain files.
|
||||
the user used mat2 on certain files.
|
||||
|
||||
|
14
dolphin/README.md
Normal file
@@ -0,0 +1,14 @@
|
||||
Dolphin integration
|
||||
===================
|
||||
|
||||
Thanks to [Miguel Marco](https://riemann.unizar.es/~mmarco/), here is an neat
|
||||
integration for [Dolphin](https://kde.org/applications/system/org.kde.dolphin),
|
||||
the KDE file manager:
|
||||
|
||||
1. Add the `mat2.desktop` file either in
|
||||
- `/usr/share/kservices5/ServiceMenus/` to install it globally
|
||||
- `~/.local/share/kservices5/ServiceMenus/` for a specific user
|
||||
2. Run `kbuildsycoca5` to update the corresponding database
|
||||
3. Enjoy your new contextual menu to remove metadata from your files!
|
||||
|
||||
|
13
dolphin/mat2.desktop
Normal file
@@ -0,0 +1,13 @@
|
||||
[Desktop Entry]
|
||||
X-KDE-ServiceTypes=KonqPopupMenu/Plugin
|
||||
MimeType=application/pdf;application/vnd.oasis.opendocument.chart;application/vnd.oasis.opendocument.formula;application/vnd.oasis.opendocument.graphics;application/vnd.oasis.opendocument.image;application/vnd.oasis.opendocument.presentation;application/vnd.oasis.opendocument.spreadsheet;application/vnd.oasis.opendocument.text;application/vnd.openxmlformats-officedocument.presentationml.presentation;application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;application/vnd.openxmlformats-officedocument.wordprocessingml.document;application/x-bittorrent;application/zip;audio/flac;audio/mpeg;audio/ogg;audio/x-flac;image/jpeg;image/png;image/tiff;image/x-ms-bmp;text/plain;video/mp4;video/x-msvideo;
|
||||
Actions=cleanMetadata;
|
||||
Type=Service
|
||||
|
||||
[Desktop Action cleanMetadata]
|
||||
Name=Clean metadata
|
||||
Name[de]=Metadaten löschen
|
||||
Name[es]=Limpiar metadatos
|
||||
Icon=/usr/share/icons/hicolor/scalable/apps/mat2.svg
|
||||
Exec=kdialog --yesno "$( mat2 -s %F )" --title "Clean Metadata?" && mat2 %U
|
||||
Exec[de]=kdialog --yesno "$( mat2 -s %F )" --title "Metadaten löschen?" && mat2 %U
|
@@ -1,7 +1,13 @@
|
||||
#!/bin/env python3
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import enum
|
||||
import importlib
|
||||
from typing import Dict
|
||||
|
||||
from . import exiftool, video
|
||||
|
||||
# A set of extension that aren't supported, despite matching a supported mimetype
|
||||
unsupported_extensions = {
|
||||
UNSUPPORTED_EXTENSIONS = {
|
||||
'.asc',
|
||||
'.bat',
|
||||
'.brf',
|
||||
@@ -12,10 +18,79 @@ unsupported_extensions = {
|
||||
'.pot',
|
||||
'.rdf',
|
||||
'.srt',
|
||||
'.text',
|
||||
'.txt',
|
||||
'.wsdl',
|
||||
'.xpdl',
|
||||
'.xsd',
|
||||
'.xsl',
|
||||
}
|
||||
|
||||
DEPENDENCIES = {
|
||||
'Cairo': {
|
||||
'module': 'cairo',
|
||||
'required': True,
|
||||
},
|
||||
'PyGobject': {
|
||||
'module': 'gi',
|
||||
'required': True,
|
||||
},
|
||||
'GdkPixbuf from PyGobject': {
|
||||
'module': 'gi.repository.GdkPixbuf',
|
||||
'required': True,
|
||||
},
|
||||
'Poppler from PyGobject': {
|
||||
'module': 'gi.repository.Poppler',
|
||||
'required': True,
|
||||
},
|
||||
'GLib from PyGobject': {
|
||||
'module': 'gi.repository.GLib',
|
||||
'required': True,
|
||||
},
|
||||
'Mutagen': {
|
||||
'module': 'mutagen',
|
||||
'required': True,
|
||||
},
|
||||
}
|
||||
|
||||
CMD_DEPENDENCIES = {
|
||||
'Exiftool': {
|
||||
'cmd': exiftool._get_exiftool_path,
|
||||
'required': False,
|
||||
},
|
||||
'Ffmpeg': {
|
||||
'cmd': video._get_ffmpeg_path,
|
||||
'required': False,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def check_dependencies() -> Dict[str, Dict[str, bool]]:
|
||||
ret: Dict[str, Dict] = dict()
|
||||
|
||||
for key, value in DEPENDENCIES.items():
|
||||
ret[key] = {
|
||||
'found': True,
|
||||
'required': value['required'],
|
||||
}
|
||||
try:
|
||||
importlib.import_module(value['module']) # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
ret[key]['found'] = False
|
||||
|
||||
for k, v in CMD_DEPENDENCIES.items():
|
||||
ret[k] = {
|
||||
'found': True,
|
||||
'required': v['required'],
|
||||
}
|
||||
try:
|
||||
v['cmd']() # type: ignore
|
||||
except RuntimeError: # pragma: no cover
|
||||
ret[k]['found'] = False
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
@enum.unique
|
||||
class UnknownMemberPolicy(enum.Enum):
|
||||
ABORT = 'abort'
|
||||
OMIT = 'omit'
|
||||
KEEP = 'keep'
|
||||
|
@@ -1,27 +1,48 @@
|
||||
import abc
|
||||
import os
|
||||
from typing import Set, Dict
|
||||
|
||||
assert Set # make pyflakes happy
|
||||
import re
|
||||
from typing import Union, Set, Dict
|
||||
|
||||
|
||||
class AbstractParser(abc.ABC):
|
||||
meta_list = set() # type: Set[str]
|
||||
mimetypes = set() # type: Set[str]
|
||||
""" This is the base class of every parser.
|
||||
It might yield `ValueError` on instantiation on invalid files,
|
||||
and `RuntimeError` when something went wrong in `remove_all`.
|
||||
"""
|
||||
meta_list: Set[str] = set()
|
||||
mimetypes: Set[str] = set()
|
||||
|
||||
def __init__(self, filename: str) -> None:
|
||||
"""
|
||||
:raises ValueError: Raised upon an invalid file
|
||||
"""
|
||||
if re.search('^[a-z0-9./]', filename) is None:
|
||||
# Some parsers are calling external binaries,
|
||||
# this prevents shell command injections
|
||||
filename = os.path.join('.', filename)
|
||||
|
||||
self.filename = filename
|
||||
fname, extension = os.path.splitext(filename)
|
||||
|
||||
# Special case for tar.gz, tar.bz2, … files
|
||||
if fname.endswith('.tar') and len(fname) > 4:
|
||||
fname, extension = fname[:-4], '.tar' + extension
|
||||
|
||||
self.output_filename = fname + '.cleaned' + extension
|
||||
self.lightweight_cleaning = False
|
||||
self.sandbox = True
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_meta(self) -> Dict[str, str]:
|
||||
pass # pragma: no cover
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
"""Return all the metadata of the current file
|
||||
|
||||
:raises RuntimeError: Raised if the cleaning process went wrong.
|
||||
"""
|
||||
|
||||
@abc.abstractmethod
|
||||
def remove_all(self) -> bool:
|
||||
pass # pragma: no cover
|
||||
"""
|
||||
Remove all the metadata of the current file
|
||||
|
||||
def remove_all_lightweight(self) -> bool:
|
||||
""" Remove _SOME_ metadata. """
|
||||
return self.remove_all()
|
||||
:raises RuntimeError: Raised if the cleaning process went wrong.
|
||||
"""
|
||||
|
487
libmat2/archive.py
Normal file
@@ -0,0 +1,487 @@
|
||||
import abc
|
||||
import stat
|
||||
import zipfile
|
||||
import datetime
|
||||
import tarfile
|
||||
import tempfile
|
||||
import os
|
||||
import logging
|
||||
import shutil
|
||||
from typing import Pattern, Union, Any, Set, Dict, List
|
||||
|
||||
from . import abstract, UnknownMemberPolicy, parser_factory
|
||||
|
||||
# pylint: disable=not-callable,assignment-from-no-return,too-many-branches
|
||||
|
||||
# An ArchiveClass is a class representing an archive,
|
||||
# while an ArchiveMember is a class representing an element
|
||||
# (usually a file) of an archive.
|
||||
ArchiveClass = Union[zipfile.ZipFile, tarfile.TarFile]
|
||||
ArchiveMember = Union[zipfile.ZipInfo, tarfile.TarInfo]
|
||||
|
||||
|
||||
class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
"""Base class for all archive-based formats.
|
||||
|
||||
Welcome to a world of frustrating complexity and tediouness:
|
||||
- A lot of file formats (docx, odt, epubs, …) are archive-based,
|
||||
so we need to add callbacks erverywhere to allow their respective
|
||||
parsers to apply specific cleanup to the required files.
|
||||
- Python has two different modules to deal with .tar and .zip files,
|
||||
with similar-but-yet-o-so-different API, so we need to write
|
||||
a ghetto-wrapper to avoid duplicating everything
|
||||
- The combination of @staticmethod and @abstractstaticmethod is
|
||||
required because for now, mypy doesn't know that
|
||||
@abstractstaticmethod is, indeed, a static method.
|
||||
- Mypy is too dumb (yet) to realise that a type A is valid under
|
||||
the Union[A, B] constrain, hence the weird `# type: ignore`
|
||||
annotations.
|
||||
"""
|
||||
# Tarfiles can optionally support compression
|
||||
# https://docs.python.org/3/library/tarfile.html#tarfile.open
|
||||
compression = ''
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
# We ignore typing here because mypy is too stupid
|
||||
self.archive_class = None # type: ignore
|
||||
self.member_class = None # type: ignore
|
||||
|
||||
# Those are the files that have a format that _isn't_
|
||||
# supported by mat2, but that we want to keep anyway.
|
||||
self.files_to_keep: Set[Pattern] = set()
|
||||
|
||||
# Those are the files that we _do not_ want to keep,
|
||||
# no matter if they are supported or not.
|
||||
self.files_to_omit: Set[Pattern] = set()
|
||||
|
||||
# what should the parser do if it encounters an unknown file in
|
||||
# the archive?
|
||||
self.unknown_member_policy: UnknownMemberPolicy = UnknownMemberPolicy.ABORT
|
||||
|
||||
# The LGTM comment is to mask a false-positive,
|
||||
# see https://lgtm.com/projects/g/jvoisin/mat2/
|
||||
self.is_archive_valid() # lgtm [py/init-calls-subclass]
|
||||
|
||||
def is_archive_valid(self):
|
||||
"""Raise a ValueError is the current archive isn't a valid one."""
|
||||
|
||||
def _specific_cleanup(self, full_path: str) -> bool:
|
||||
""" This method can be used to apply specific treatment
|
||||
to files present in the archive."""
|
||||
# pylint: disable=unused-argument
|
||||
return True # pragma: no cover
|
||||
|
||||
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
|
||||
""" This method can be used to extract specific metadata
|
||||
from files present in the archive."""
|
||||
# pylint: disable=unused-argument
|
||||
return {} # pragma: no cover
|
||||
|
||||
def _final_checks(self) -> bool:
|
||||
""" This method is invoked after the file has been cleaned,
|
||||
allowing to run final verifications.
|
||||
"""
|
||||
# pylint: disable=unused-argument
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||
"""Return all the members of the archive."""
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def _clean_member(member: ArchiveMember) -> ArchiveMember:
|
||||
"""Remove all the metadata for a given member."""
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
|
||||
"""Return all the metadata of a given member."""
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def _get_member_name(member: ArchiveMember) -> str:
|
||||
"""Return the name of the given member."""
|
||||
|
||||
@staticmethod
|
||||
@abc.abstractmethod
|
||||
def _is_dir(member: ArchiveMember) -> bool:
|
||||
"""Return true is the given member is a directory."""
|
||||
|
||||
@abc.abstractmethod
|
||||
def _add_file_to_archive(self, archive: ArchiveClass, member: ArchiveMember,
|
||||
full_path: str):
|
||||
"""Add the file at full_path to the archive, via the given member."""
|
||||
|
||||
@staticmethod
|
||||
def _set_member_permissions(member: ArchiveMember, permissions: int) -> ArchiveMember:
|
||||
"""Set the permission of the archive member."""
|
||||
# pylint: disable=unused-argument
|
||||
return member
|
||||
|
||||
@staticmethod
|
||||
def _get_member_compression(member: ArchiveMember):
|
||||
"""Get the compression of the archive member."""
|
||||
# pylint: disable=unused-argument
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
|
||||
"""Set the compression of the archive member."""
|
||||
# pylint: disable=unused-argument
|
||||
return member
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
meta: Dict[str, Union[str, Dict]] = dict()
|
||||
|
||||
with self.archive_class(self.filename) as zin:
|
||||
temp_folder = tempfile.mkdtemp()
|
||||
|
||||
for item in self._get_all_members(zin):
|
||||
local_meta = self._get_member_meta(item)
|
||||
member_name = self._get_member_name(item)
|
||||
|
||||
if self._is_dir(item): # pragma: no cover
|
||||
continue # don't keep empty folders
|
||||
|
||||
full_path = os.path.join(temp_folder, member_name)
|
||||
if not os.path.abspath(full_path).startswith(temp_folder):
|
||||
logging.error("%s contains a file (%s) pointing outside (%s) of its root.",
|
||||
self.filename, member_name, full_path)
|
||||
break
|
||||
|
||||
try:
|
||||
zin.extract(member=item, path=temp_folder)
|
||||
except OSError as e:
|
||||
logging.error("Unable to extraxt %s from %s: %s", item, self.filename, e)
|
||||
|
||||
os.chmod(full_path, stat.S_IRUSR)
|
||||
|
||||
specific_meta = self._specific_get_meta(full_path, member_name)
|
||||
local_meta = {**local_meta, **specific_meta}
|
||||
|
||||
member_parser, _ = parser_factory.get_parser(full_path) # type: ignore
|
||||
if member_parser:
|
||||
member_parser.sandbox = self.sandbox
|
||||
local_meta = {**local_meta, **member_parser.get_meta()}
|
||||
|
||||
if local_meta:
|
||||
meta[member_name] = local_meta
|
||||
|
||||
shutil.rmtree(temp_folder)
|
||||
return meta
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
# pylint: disable=too-many-branches
|
||||
|
||||
with self.archive_class(self.filename) as zin,\
|
||||
self.archive_class(self.output_filename, 'w' + self.compression) as zout:
|
||||
|
||||
temp_folder = tempfile.mkdtemp()
|
||||
abort = False
|
||||
|
||||
# Sort the items to process, to reduce fingerprinting,
|
||||
# and keep them in the `items` variable.
|
||||
items: List[ArchiveMember] = list()
|
||||
for item in sorted(self._get_all_members(zin), key=self._get_member_name):
|
||||
# Some fileformats do require to have the `mimetype` file
|
||||
# as the first file in the archive.
|
||||
if self._get_member_name(item) == 'mimetype':
|
||||
items.insert(0, item)
|
||||
else:
|
||||
items.append(item)
|
||||
|
||||
# Since files order is a fingerprint factor,
|
||||
# we're iterating (and thus inserting) them in lexicographic order.
|
||||
for item in items:
|
||||
member_name = self._get_member_name(item)
|
||||
if self._is_dir(item):
|
||||
continue # don't keep empty folders
|
||||
|
||||
full_path = os.path.join(temp_folder, member_name)
|
||||
if not os.path.abspath(full_path).startswith(temp_folder):
|
||||
logging.error("%s contains a file (%s) pointing outside (%s) of its root.",
|
||||
self.filename, member_name, full_path)
|
||||
abort = True
|
||||
break
|
||||
|
||||
zin.extract(member=item, path=temp_folder)
|
||||
|
||||
try:
|
||||
original_permissions = os.stat(full_path).st_mode
|
||||
except FileNotFoundError:
|
||||
logging.error("Something went wrong during processing of "
|
||||
"%s in %s, likely a path traversal attack.",
|
||||
member_name, self.filename)
|
||||
abort = True
|
||||
# we're breaking instead of continuing, because this exception
|
||||
# is raised in case of weird path-traversal-like atttacks.
|
||||
break
|
||||
|
||||
os.chmod(full_path, original_permissions | stat.S_IWUSR | stat.S_IRUSR)
|
||||
|
||||
original_compression = self._get_member_compression(item)
|
||||
|
||||
if self._specific_cleanup(full_path) is False:
|
||||
logging.warning("Something went wrong during deep cleaning of %s in %s",
|
||||
member_name, self.filename)
|
||||
abort = True
|
||||
continue
|
||||
|
||||
if any(map(lambda r: r.search(member_name), self.files_to_keep)):
|
||||
# those files aren't supported, but we want to add them anyway
|
||||
pass
|
||||
elif any(map(lambda r: r.search(member_name), self.files_to_omit)):
|
||||
continue
|
||||
else: # supported files that we want to first clean, then add
|
||||
member_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
|
||||
if not member_parser:
|
||||
if self.unknown_member_policy == UnknownMemberPolicy.OMIT:
|
||||
logging.warning("In file %s, omitting unknown element %s (format: %s)",
|
||||
self.filename, member_name, mtype)
|
||||
continue
|
||||
elif self.unknown_member_policy == UnknownMemberPolicy.KEEP:
|
||||
logging.warning("In file %s, keeping unknown element %s (format: %s)",
|
||||
self.filename, member_name, mtype)
|
||||
else:
|
||||
logging.error("In file %s, element %s's format (%s) "
|
||||
"isn't supported",
|
||||
self.filename, member_name, mtype)
|
||||
abort = True
|
||||
continue
|
||||
else:
|
||||
member_parser.sandbox = self.sandbox
|
||||
if member_parser.remove_all() is False:
|
||||
logging.warning("In file %s, something went wrong \
|
||||
with the cleaning of %s \
|
||||
(format: %s)",
|
||||
self.filename, member_name, mtype)
|
||||
abort = True
|
||||
continue
|
||||
os.rename(member_parser.output_filename, full_path)
|
||||
|
||||
zinfo = self.member_class(member_name) # type: ignore
|
||||
zinfo = self._set_member_permissions(zinfo, original_permissions)
|
||||
zinfo = self._set_member_compression(zinfo, original_compression)
|
||||
clean_zinfo = self._clean_member(zinfo)
|
||||
self._add_file_to_archive(zout, clean_zinfo, full_path)
|
||||
|
||||
shutil.rmtree(temp_folder)
|
||||
if abort:
|
||||
os.remove(self.output_filename)
|
||||
return False
|
||||
if not self._final_checks():
|
||||
return False # pragma: no cover
|
||||
return True
|
||||
|
||||
|
||||
class TarParser(ArchiveBasedAbstractParser):
|
||||
mimetypes = {'application/x-tar'}
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
# yes, it's tarfile.open and not tarfile.TarFile,
|
||||
# as stated in the documentation:
|
||||
# https://docs.python.org/3/library/tarfile.html#tarfile.TarFile
|
||||
# This is required to support compressed archives.
|
||||
self.archive_class = tarfile.open
|
||||
self.member_class = tarfile.TarInfo
|
||||
|
||||
def is_archive_valid(self):
|
||||
if tarfile.is_tarfile(self.filename) is False:
|
||||
raise ValueError
|
||||
self.__check_tarfile_safety()
|
||||
|
||||
def __check_tarfile_safety(self):
|
||||
"""Checks if the tarfile doesn't have any "suspicious" members.
|
||||
|
||||
This is a rewrite of this patch: https://bugs.python.org/file47826/safetarfile-4.diff
|
||||
inspired by this bug from 2014: https://bugs.python.org/issue21109
|
||||
because Python's stdlib doesn't provide a way to "safely" extract
|
||||
things from a tar file.
|
||||
"""
|
||||
names = set()
|
||||
with tarfile.open(self.filename) as f:
|
||||
members = f.getmembers()
|
||||
for member in members:
|
||||
name = member.name
|
||||
if os.path.isabs(name):
|
||||
raise ValueError("The archive %s contains a file with an " \
|
||||
"absolute path: %s" % (self.filename, name))
|
||||
elif os.path.normpath(name).startswith('../') or '/../' in name:
|
||||
raise ValueError("The archive %s contains a file with an " \
|
||||
"path traversal attack: %s" % (self.filename, name))
|
||||
|
||||
if name in names:
|
||||
raise ValueError("The archive %s contains two times the same " \
|
||||
"file: %s" % (self.filename, name))
|
||||
else:
|
||||
names.add(name)
|
||||
|
||||
if member.isfile():
|
||||
if member.mode & stat.S_ISUID:
|
||||
raise ValueError("The archive %s contains a setuid file: %s" % \
|
||||
(self.filename, name))
|
||||
elif member.mode & stat.S_ISGID:
|
||||
raise ValueError("The archive %s contains a setgid file: %s" % \
|
||||
(self.filename, name))
|
||||
elif member.issym():
|
||||
linkname = member.linkname
|
||||
if os.path.normpath(linkname).startswith('..'):
|
||||
raise ValueError('The archive %s contains a symlink pointing' \
|
||||
'outside of the archive via a path traversal: %s -> %s' % \
|
||||
(self.filename, name, linkname))
|
||||
if os.path.isabs(linkname):
|
||||
raise ValueError('The archive %s contains a symlink pointing' \
|
||||
'outside of the archive: %s -> %s' % \
|
||||
(self.filename, name, linkname))
|
||||
elif member.isdev():
|
||||
raise ValueError("The archive %s contains a non-regular " \
|
||||
"file: %s" % (self.filename, name))
|
||||
elif member.islnk():
|
||||
raise ValueError("The archive %s contains a hardlink: %s" \
|
||||
% (self.filename, name))
|
||||
|
||||
@staticmethod
|
||||
def _clean_member(member: ArchiveMember) -> ArchiveMember:
|
||||
assert isinstance(member, tarfile.TarInfo) # please mypy
|
||||
member.mtime = member.uid = member.gid = 0
|
||||
member.uname = member.gname = ''
|
||||
return member
|
||||
|
||||
@staticmethod
|
||||
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
|
||||
assert isinstance(member, tarfile.TarInfo) # please mypy
|
||||
metadata = {}
|
||||
if member.mtime != 0:
|
||||
metadata['mtime'] = str(datetime.datetime.fromtimestamp(member.mtime))
|
||||
if member.uid != 0:
|
||||
metadata['uid'] = str(member.uid)
|
||||
if member.gid != 0:
|
||||
metadata['gid'] = str(member.gid)
|
||||
if member.uname != '':
|
||||
metadata['uname'] = member.uname
|
||||
if member.gname != '':
|
||||
metadata['gname'] = member.gname
|
||||
return metadata
|
||||
|
||||
def _add_file_to_archive(self, archive: ArchiveClass, member: ArchiveMember,
|
||||
full_path: str):
|
||||
assert isinstance(member, tarfile.TarInfo) # please mypy
|
||||
assert isinstance(archive, tarfile.TarFile) # please mypy
|
||||
archive.add(full_path, member.name, filter=TarParser._clean_member) # type: ignore
|
||||
|
||||
@staticmethod
|
||||
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||
assert isinstance(archive, tarfile.TarFile) # please mypy
|
||||
return archive.getmembers() # type: ignore
|
||||
|
||||
@staticmethod
|
||||
def _get_member_name(member: ArchiveMember) -> str:
|
||||
assert isinstance(member, tarfile.TarInfo) # please mypy
|
||||
return member.name
|
||||
|
||||
@staticmethod
|
||||
def _set_member_permissions(member: ArchiveMember, permissions: int) -> ArchiveMember:
|
||||
assert isinstance(member, tarfile.TarInfo) # please mypy
|
||||
member.mode = permissions
|
||||
return member
|
||||
|
||||
@staticmethod
|
||||
def _is_dir(member: ArchiveMember) -> bool:
|
||||
assert isinstance(member, tarfile.TarInfo) # please mypy
|
||||
return member.isdir()
|
||||
|
||||
|
||||
class TarGzParser(TarParser):
|
||||
compression = ':gz'
|
||||
mimetypes = {'application/x-tar+gz'}
|
||||
|
||||
|
||||
class TarBz2Parser(TarParser):
|
||||
compression = ':bz2'
|
||||
mimetypes = {'application/x-tar+bz2'}
|
||||
|
||||
|
||||
class TarXzParser(TarParser):
|
||||
compression = ':xz'
|
||||
mimetypes = {'application/x-tar+xz'}
|
||||
|
||||
|
||||
class ZipParser(ArchiveBasedAbstractParser):
|
||||
mimetypes = {'application/zip'}
|
||||
|
||||
def __init__(self, filename: str):
|
||||
super().__init__(filename)
|
||||
self.archive_class = zipfile.ZipFile
|
||||
self.member_class = zipfile.ZipInfo
|
||||
|
||||
def is_archive_valid(self):
|
||||
try:
|
||||
with zipfile.ZipFile(self.filename):
|
||||
pass
|
||||
except (zipfile.BadZipFile, OSError):
|
||||
raise ValueError
|
||||
|
||||
@staticmethod
|
||||
def _clean_member(member: ArchiveMember) -> ArchiveMember:
|
||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||
member.create_system = 3 # Linux
|
||||
member.comment = b''
|
||||
member.date_time = (1980, 1, 1, 0, 0, 0) # this is as early as a zipfile can be
|
||||
return member
|
||||
|
||||
@staticmethod
|
||||
def _get_member_meta(member: ArchiveMember) -> Dict[str, str]:
|
||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||
metadata = {}
|
||||
if member.create_system == 3: # this is Linux
|
||||
pass
|
||||
elif member.create_system == 2:
|
||||
metadata['create_system'] = 'Windows'
|
||||
else:
|
||||
metadata['create_system'] = 'Weird'
|
||||
|
||||
if member.comment:
|
||||
metadata['comment'] = member.comment # type: ignore
|
||||
|
||||
if member.date_time != (1980, 1, 1, 0, 0, 0):
|
||||
metadata['date_time'] = str(datetime.datetime(*member.date_time))
|
||||
|
||||
return metadata
|
||||
|
||||
def _add_file_to_archive(self, archive: ArchiveClass, member: ArchiveMember,
|
||||
full_path: str):
|
||||
assert isinstance(archive, zipfile.ZipFile) # please mypy
|
||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||
with open(full_path, 'rb') as f:
|
||||
archive.writestr(member, f.read(),
|
||||
compress_type=member.compress_type)
|
||||
|
||||
@staticmethod
|
||||
def _get_all_members(archive: ArchiveClass) -> List[ArchiveMember]:
|
||||
assert isinstance(archive, zipfile.ZipFile) # please mypy
|
||||
return archive.infolist() # type: ignore
|
||||
|
||||
@staticmethod
|
||||
def _get_member_name(member: ArchiveMember) -> str:
|
||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||
return member.filename
|
||||
|
||||
@staticmethod
|
||||
def _get_member_compression(member: ArchiveMember):
|
||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||
return member.compress_type
|
||||
|
||||
@staticmethod
|
||||
def _set_member_compression(member: ArchiveMember, compression) -> ArchiveMember:
|
||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||
member.compress_type = compression
|
||||
return member
|
||||
|
||||
@staticmethod
|
||||
def _is_dir(member: ArchiveMember) -> bool:
|
||||
assert isinstance(member, zipfile.ZipInfo) # please mypy
|
||||
return member.is_dir()
|
@@ -1,32 +1,54 @@
|
||||
import mimetypes
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from typing import Union, Dict
|
||||
|
||||
import mutagen
|
||||
|
||||
from . import abstract
|
||||
from . import abstract, parser_factory, video
|
||||
|
||||
|
||||
class MutagenParser(abstract.AbstractParser):
|
||||
def get_meta(self):
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
try:
|
||||
if mutagen.File(self.filename) is None:
|
||||
raise ValueError
|
||||
except mutagen.MutagenError:
|
||||
raise ValueError
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
f = mutagen.File(self.filename)
|
||||
if f.tags:
|
||||
return {k:', '.join(v) for k, v in f.tags.items()}
|
||||
return {k: ', '.join(map(str, v)) for k, v in f.tags.items()}
|
||||
return {}
|
||||
|
||||
def remove_all(self):
|
||||
def remove_all(self) -> bool:
|
||||
shutil.copy(self.filename, self.output_filename)
|
||||
f = mutagen.File(self.output_filename)
|
||||
f.delete()
|
||||
f.save()
|
||||
try:
|
||||
f.delete()
|
||||
f.save()
|
||||
except mutagen.MutagenError:
|
||||
raise ValueError
|
||||
return True
|
||||
|
||||
|
||||
class MP3Parser(MutagenParser):
|
||||
mimetypes = {'audio/mpeg', }
|
||||
|
||||
def get_meta(self):
|
||||
metadata = {}
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
metadata: Dict[str, Union[str, Dict]] = dict()
|
||||
meta = mutagen.File(self.filename).tags
|
||||
if not meta:
|
||||
return metadata
|
||||
for key in meta:
|
||||
if isinstance(key, tuple):
|
||||
metadata[key[0]] = key[1]
|
||||
continue
|
||||
if not hasattr(meta[key], 'text'): # pragma: no cover
|
||||
continue
|
||||
metadata[key.rstrip(' \t\r\n\0')] = ', '.join(map(str, meta[key].text))
|
||||
return metadata
|
||||
|
||||
@@ -36,4 +58,57 @@ class OGGParser(MutagenParser):
|
||||
|
||||
|
||||
class FLACParser(MutagenParser):
|
||||
mimetypes = {'audio/flac', 'audio/x-flac' }
|
||||
mimetypes = {'audio/flac', 'audio/x-flac'}
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
shutil.copy(self.filename, self.output_filename)
|
||||
f = mutagen.File(self.output_filename)
|
||||
f.clear_pictures()
|
||||
f.delete()
|
||||
f.save(deleteid3=True)
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
meta = super().get_meta()
|
||||
for num, picture in enumerate(mutagen.File(self.filename).pictures):
|
||||
name = picture.desc if picture.desc else 'Cover %d' % num
|
||||
extension = mimetypes.guess_extension(picture.mime)
|
||||
if extension is None: # pragma: no cover
|
||||
meta[name] = 'harmful data'
|
||||
continue
|
||||
|
||||
_, fname = tempfile.mkstemp()
|
||||
fname = fname + extension
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(picture.data)
|
||||
p, _ = parser_factory.get_parser(fname) # type: ignore
|
||||
if p is None:
|
||||
raise ValueError
|
||||
p.sandbox = self.sandbox
|
||||
# Mypy chokes on ternaries :/
|
||||
meta[name] = p.get_meta() if p else 'harmful data' # type: ignore
|
||||
os.remove(fname)
|
||||
return meta
|
||||
|
||||
|
||||
class WAVParser(video.AbstractFFmpegParser):
|
||||
mimetypes = {'audio/x-wav', }
|
||||
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',
|
||||
'Duration', 'Encoding', 'ExifToolVersion',
|
||||
'FileAccessDate', 'FileInodeChangeDate',
|
||||
'FileModifyDate', 'FileName', 'FilePermissions',
|
||||
'FileSize', 'FileType', 'FileTypeExtension',
|
||||
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
|
||||
}
|
||||
|
||||
|
||||
class AIFFParser(video.AbstractFFmpegParser):
|
||||
mimetypes = {'audio/aiff', 'audio/x-aiff'}
|
||||
meta_allowlist = {'AvgBytesPerSec', 'BitsPerSample', 'Directory',
|
||||
'Duration', 'Encoding', 'ExifToolVersion',
|
||||
'FileAccessDate', 'FileInodeChangeDate',
|
||||
'FileModifyDate', 'FileName', 'FilePermissions',
|
||||
'FileSize', 'FileType', 'FileTypeExtension',
|
||||
'MIMEType', 'NumChannels', 'SampleRate', 'SourceFile',
|
||||
'NumSampleFrames', 'SampleSize',
|
||||
}
|
||||
|
113
libmat2/bubblewrap.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""
|
||||
Wrapper around a subset of the subprocess module,
|
||||
that uses bwrap (bubblewrap) when it is available.
|
||||
|
||||
Instead of importing subprocess, other modules should use this as follows:
|
||||
|
||||
from . import subprocess
|
||||
"""
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import tempfile
|
||||
import functools
|
||||
from typing import Optional, List
|
||||
|
||||
|
||||
__all__ = ['PIPE', 'run', 'CalledProcessError']
|
||||
PIPE = subprocess.PIPE
|
||||
CalledProcessError = subprocess.CalledProcessError
|
||||
|
||||
# pylint: disable=subprocess-run-check
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _get_bwrap_path() -> str:
|
||||
which_path = shutil.which('bwrap')
|
||||
if which_path:
|
||||
return which_path
|
||||
|
||||
raise RuntimeError("Unable to find bwrap") # pragma: no cover
|
||||
|
||||
|
||||
def _get_bwrap_args(tempdir: str,
|
||||
input_filename: str,
|
||||
output_filename: Optional[str] = None) -> List[str]:
|
||||
ro_bind_args = []
|
||||
cwd = os.getcwd()
|
||||
|
||||
# XXX: use --ro-bind-try once all supported platforms
|
||||
# have a bubblewrap recent enough to support it.
|
||||
ro_bind_dirs = ['/usr', '/lib', '/lib64', '/bin', '/sbin', '/etc/alternatives', cwd]
|
||||
for bind_dir in ro_bind_dirs:
|
||||
if os.path.isdir(bind_dir): # pragma: no cover
|
||||
ro_bind_args.extend(['--ro-bind', bind_dir, bind_dir])
|
||||
|
||||
ro_bind_files = ['/etc/ld.so.cache']
|
||||
for bind_file in ro_bind_files:
|
||||
if os.path.isfile(bind_file): # pragma: no cover
|
||||
ro_bind_args.extend(['--ro-bind', bind_file, bind_file])
|
||||
|
||||
args = ro_bind_args + \
|
||||
['--dev', '/dev',
|
||||
'--proc', '/proc',
|
||||
'--chdir', cwd,
|
||||
'--unshare-user-try',
|
||||
'--unshare-ipc',
|
||||
'--unshare-pid',
|
||||
'--unshare-net',
|
||||
'--unshare-uts',
|
||||
'--unshare-cgroup-try',
|
||||
'--new-session',
|
||||
'--cap-drop', 'all',
|
||||
# XXX: enable --die-with-parent once all supported platforms have
|
||||
# a bubblewrap recent enough to support it.
|
||||
# '--die-with-parent',
|
||||
]
|
||||
|
||||
if output_filename:
|
||||
# Mount an empty temporary directory where the sandboxed
|
||||
# process will create its output file
|
||||
output_dirname = os.path.dirname(os.path.abspath(output_filename))
|
||||
args.extend(['--bind', tempdir, output_dirname])
|
||||
|
||||
absolute_input_filename = os.path.abspath(input_filename)
|
||||
args.extend(['--ro-bind', absolute_input_filename, absolute_input_filename])
|
||||
|
||||
return args
|
||||
|
||||
|
||||
def run(args: List[str],
|
||||
input_filename: str,
|
||||
output_filename: Optional[str] = None,
|
||||
**kwargs) -> subprocess.CompletedProcess:
|
||||
"""Wrapper around `subprocess.run`, that uses bwrap (bubblewrap) if it
|
||||
is available.
|
||||
|
||||
Extra supported keyword arguments:
|
||||
|
||||
- `input_filename`, made available read-only in the sandbox
|
||||
- `output_filename`, where the file created by the sandboxed process
|
||||
is copied upon successful completion; an empty temporary directory
|
||||
is made visible as the parent directory of this file in the sandbox.
|
||||
Optional: one valid use case is to invoke an external process
|
||||
to inspect metadata present in a file.
|
||||
"""
|
||||
try:
|
||||
bwrap_path = _get_bwrap_path()
|
||||
except RuntimeError: # pragma: no cover
|
||||
# bubblewrap is not installed ⇒ short-circuit
|
||||
return subprocess.run(args, **kwargs)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tempdir:
|
||||
prefix_args = [bwrap_path] + \
|
||||
_get_bwrap_args(input_filename=input_filename,
|
||||
output_filename=output_filename,
|
||||
tempdir=tempdir)
|
||||
completed_process = subprocess.run(prefix_args + args, **kwargs)
|
||||
if output_filename and completed_process.returncode == 0:
|
||||
shutil.copy(os.path.join(tempdir, os.path.basename(output_filename)),
|
||||
output_filename)
|
||||
|
||||
return completed_process
|
115
libmat2/epub.py
Normal file
@@ -0,0 +1,115 @@
|
||||
import logging
|
||||
import re
|
||||
import uuid
|
||||
import zipfile
|
||||
import xml.etree.ElementTree as ET # type: ignore
|
||||
from typing import Any, Dict
|
||||
|
||||
from . import archive, office
|
||||
|
||||
|
||||
class EPUBParser(archive.ZipParser):
|
||||
mimetypes = {'application/epub+zip', }
|
||||
metadata_namespace = '{http://purl.org/dc/elements/1.1/}'
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
self.files_to_keep = set(map(re.compile, { # type: ignore
|
||||
'META-INF/container.xml',
|
||||
'mimetype',
|
||||
'OEBPS/content.opf',
|
||||
'content.opf',
|
||||
'hmh.opf',
|
||||
'OPS/.+.xml'
|
||||
}))
|
||||
self.files_to_omit = set(map(re.compile, { # type: ignore
|
||||
'iTunesMetadata.plist',
|
||||
'META-INF/calibre_bookmarks.txt',
|
||||
'OEBPS/package.opf',
|
||||
}))
|
||||
self.uniqid = uuid.uuid4()
|
||||
|
||||
def is_archive_valid(self):
|
||||
super().is_archive_valid()
|
||||
with zipfile.ZipFile(self.filename) as zin:
|
||||
for item in self._get_all_members(zin):
|
||||
member_name = self._get_member_name(item)
|
||||
if member_name.endswith('META-INF/encryption.xml'):
|
||||
raise ValueError('the file contains encrypted fonts')
|
||||
|
||||
def _specific_get_meta(self, full_path, file_path) -> Dict[str, Any]:
|
||||
if not file_path.endswith('.opf'):
|
||||
return {}
|
||||
|
||||
with open(full_path, encoding='utf-8') as f:
|
||||
try:
|
||||
results = re.findall(r"<((?:meta|dc|cp).+?)[^>]*>(.+)</\1>",
|
||||
f.read(), re.I|re.M)
|
||||
return {k:v for (k, v) in results}
|
||||
except (TypeError, UnicodeDecodeError):
|
||||
return {file_path: 'harmful content', }
|
||||
|
||||
def _specific_cleanup(self, full_path: str) -> bool:
|
||||
if full_path.endswith('hmh.opf') or full_path.endswith('content.opf'):
|
||||
return self.__handle_contentopf(full_path)
|
||||
elif full_path.endswith('OEBPS/toc.ncx'):
|
||||
return self.__handle_tocncx(full_path)
|
||||
elif re.search('/OPS/[^/]+.xml$', full_path):
|
||||
return self.__handle_ops_xml(full_path)
|
||||
return True
|
||||
|
||||
def __handle_ops_xml(self, full_path: str) -> bool:
|
||||
try:
|
||||
tree, namespace = office._parse_xml(full_path)
|
||||
except ET.ParseError: # pragma: nocover
|
||||
logging.error("Unable to parse %s in %s.", full_path, self.filename)
|
||||
return False
|
||||
|
||||
for item in tree.iterfind('.//', namespace): # pragma: nocover
|
||||
if item.tag.strip().lower().endswith('head'):
|
||||
item.clear()
|
||||
break
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8',
|
||||
short_empty_elements=False)
|
||||
return True
|
||||
|
||||
def __handle_tocncx(self, full_path: str) -> bool:
|
||||
try:
|
||||
tree, namespace = office._parse_xml(full_path)
|
||||
except ET.ParseError: # pragma: nocover
|
||||
logging.error("Unable to parse %s in %s.", full_path, self.filename)
|
||||
return False
|
||||
|
||||
for item in tree.iterfind('.//', namespace): # pragma: nocover
|
||||
if item.tag.strip().lower().endswith('head'):
|
||||
item.clear()
|
||||
ET.SubElement(item, 'meta', attrib={'name': '', 'content': ''})
|
||||
break
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8',
|
||||
short_empty_elements=False)
|
||||
return True
|
||||
|
||||
def __handle_contentopf(self, full_path: str) -> bool:
|
||||
try:
|
||||
tree, namespace = office._parse_xml(full_path)
|
||||
except ET.ParseError:
|
||||
logging.error("Unable to parse %s in %s.", full_path, self.filename)
|
||||
return False
|
||||
|
||||
for item in tree.iterfind('.//', namespace): # pragma: nocover
|
||||
if item.tag.strip().lower().endswith('metadata'):
|
||||
item.clear()
|
||||
|
||||
# item with mandatory content
|
||||
uniqid = ET.Element(self.metadata_namespace + 'identifier')
|
||||
uniqid.text = str(self.uniqid)
|
||||
uniqid.set('id', 'id')
|
||||
item.append(uniqid)
|
||||
|
||||
# items without mandatory content
|
||||
for name in ['language', 'title']:
|
||||
uniqid = ET.Element(self.metadata_namespace + name)
|
||||
item.append(uniqid)
|
||||
break # there is only a single <metadata> block
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
80
libmat2/exiftool.py
Normal file
@@ -0,0 +1,80 @@
|
||||
import functools
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from typing import Union, Set, Dict
|
||||
|
||||
from . import abstract
|
||||
from . import bubblewrap
|
||||
|
||||
|
||||
class ExiftoolParser(abstract.AbstractParser):
|
||||
""" Exiftool is often the easiest way to get all the metadata
|
||||
from a import file, hence why several parsers are re-using its `get_meta`
|
||||
method.
|
||||
"""
|
||||
meta_allowlist: Set[str] = set()
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
try:
|
||||
if self.sandbox:
|
||||
out = bubblewrap.run([_get_exiftool_path(), '-json',
|
||||
self.filename],
|
||||
input_filename=self.filename,
|
||||
check=True, stdout=subprocess.PIPE).stdout
|
||||
else:
|
||||
out = subprocess.run([_get_exiftool_path(), '-json',
|
||||
self.filename],
|
||||
check=True, stdout=subprocess.PIPE).stdout
|
||||
except subprocess.CalledProcessError: # pragma: no cover
|
||||
raise ValueError
|
||||
meta = json.loads(out.decode('utf-8'))[0]
|
||||
for key in self.meta_allowlist:
|
||||
meta.pop(key, None)
|
||||
return meta
|
||||
|
||||
def _lightweight_cleanup(self) -> bool:
|
||||
if os.path.exists(self.output_filename):
|
||||
try: # exiftool can't force output to existing files
|
||||
os.remove(self.output_filename)
|
||||
except OSError as e: # pragma: no cover
|
||||
logging.error("The output file %s is already existing and \
|
||||
can't be overwritten: %s.", self.filename, e)
|
||||
return False
|
||||
|
||||
# Note: '-All=' must be followed by a known exiftool option.
|
||||
# Also, '-CommonIFD0' is needed for .tiff files
|
||||
cmd = [_get_exiftool_path(),
|
||||
'-all=', # remove metadata
|
||||
'-adobe=', # remove adobe-specific metadata
|
||||
'-exif:all=', # remove all exif metadata
|
||||
'-Time:All=', # remove all timestamps
|
||||
'-quiet', # don't show useless logs
|
||||
'-CommonIFD0=', # remove IFD0 metadata
|
||||
'-o', self.output_filename,
|
||||
self.filename]
|
||||
try:
|
||||
if self.sandbox:
|
||||
bubblewrap.run(cmd, check=True,
|
||||
input_filename=self.filename,
|
||||
output_filename=self.output_filename)
|
||||
else:
|
||||
subprocess.run(cmd, check=True)
|
||||
except subprocess.CalledProcessError as e: # pragma: no cover
|
||||
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
|
||||
return False
|
||||
return True
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _get_exiftool_path() -> str: # pragma: no cover
|
||||
which_path = shutil.which('exiftool')
|
||||
if which_path:
|
||||
return which_path
|
||||
|
||||
# Exiftool on Arch Linux has a weird path
|
||||
if os.access('/usr/bin/vendor_perl/exiftool', os.X_OK):
|
||||
return '/usr/bin/vendor_perl/exiftool'
|
||||
|
||||
raise RuntimeError("Unable to find exiftool")
|
@@ -1,18 +1,15 @@
|
||||
from typing import Dict
|
||||
import shutil
|
||||
from typing import Union, Dict
|
||||
from . import abstract
|
||||
|
||||
|
||||
class HarmlessParser(abstract.AbstractParser):
|
||||
""" This is the parser for filetypes that do not contain metadata. """
|
||||
mimetypes = {'text/plain', }
|
||||
""" This is the parser for filetypes that can not contain metadata. """
|
||||
mimetypes = {'text/plain', 'image/x-ms-bmp', 'image/bmp'}
|
||||
|
||||
def __init__(self, filename: str) -> None:
|
||||
super().__init__(filename)
|
||||
self.filename = filename
|
||||
self.output_filename = filename
|
||||
|
||||
def get_meta(self) -> Dict[str, str]:
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
return dict()
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
shutil.copy(self.filename, self.output_filename)
|
||||
return True
|
||||
|
@@ -1,49 +1,63 @@
|
||||
import subprocess
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
import re
|
||||
from typing import Union, Any, Dict
|
||||
|
||||
import cairo
|
||||
|
||||
import gi
|
||||
gi.require_version('GdkPixbuf', '2.0')
|
||||
from gi.repository import GdkPixbuf
|
||||
gi.require_version('Rsvg', '2.0')
|
||||
from gi.repository import GdkPixbuf, GLib, Rsvg
|
||||
|
||||
from . import abstract
|
||||
from . import exiftool, abstract
|
||||
|
||||
class SVGParser(exiftool.ExiftoolParser):
|
||||
mimetypes = {'image/svg+xml', }
|
||||
meta_allowlist = {'Directory', 'ExifToolVersion', 'FileAccessDate',
|
||||
'FileInodeChangeDate', 'FileModifyDate', 'FileName',
|
||||
'FilePermissions', 'FileSize', 'FileType',
|
||||
'FileTypeExtension', 'ImageHeight', 'ImageWidth',
|
||||
'MIMEType', 'SVGVersion', 'SourceFile', 'ViewBox'
|
||||
}
|
||||
|
||||
class __ImageParser(abstract.AbstractParser):
|
||||
@staticmethod
|
||||
def __handle_problematic_filename(filename:str, callback) -> str:
|
||||
""" This method takes a filename with a problematic name,
|
||||
and safely applies it a `callback`."""
|
||||
tmpdirname = tempfile.mkdtemp()
|
||||
fname = os.path.join(tmpdirname, "temp_file")
|
||||
shutil.copy(filename, fname)
|
||||
out = callback(fname)
|
||||
shutil.rmtree(tmpdirname)
|
||||
return out
|
||||
def remove_all(self) -> bool:
|
||||
try:
|
||||
svg = Rsvg.Handle.new_from_file(self.filename)
|
||||
except GLib.GError:
|
||||
raise ValueError
|
||||
|
||||
def get_meta(self):
|
||||
""" There is no way to escape the leading(s) dash(es) of the current
|
||||
self.filename to prevent parameter injections, so we need to take care
|
||||
of this.
|
||||
"""
|
||||
fun = lambda f: subprocess.check_output(['/usr/bin/exiftool', '-json', f])
|
||||
if re.search('^[a-z0-9/]', self.filename) is None:
|
||||
out = self.__handle_problematic_filename(self.filename, fun)
|
||||
else:
|
||||
out = fun(self.filename)
|
||||
meta = json.loads(out.decode('utf-8'))[0]
|
||||
for key in self.meta_whitelist:
|
||||
meta.pop(key, None)
|
||||
try:
|
||||
_, _, _, _, has_viewbox, viewbox = svg.get_intrinsic_dimensions()
|
||||
if has_viewbox is False:
|
||||
raise ValueError
|
||||
_, width, height = svg.get_intrinsic_size_in_pixels()
|
||||
except AttributeError:
|
||||
dimensions = svg.get_dimensions()
|
||||
height, width = dimensions.height, dimensions.width
|
||||
|
||||
surface = cairo.SVGSurface(self.output_filename, height, width)
|
||||
context = cairo.Context(surface)
|
||||
try:
|
||||
svg.render_document(context, viewbox)
|
||||
except AttributeError:
|
||||
svg.render_cairo(context)
|
||||
|
||||
surface.finish()
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
meta = super().get_meta()
|
||||
|
||||
# The namespace is mandatory, but only the …/2000/svg is valid.
|
||||
ns = 'http://www.w3.org/2000/svg'
|
||||
if meta.get('Xmlns') == ns:
|
||||
meta.pop('Xmlns')
|
||||
return meta
|
||||
|
||||
class PNGParser(__ImageParser):
|
||||
|
||||
class PNGParser(exiftool.ExiftoolParser):
|
||||
mimetypes = {'image/png', }
|
||||
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
||||
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
||||
'Directory', 'FileSize', 'FileModifyDate',
|
||||
'FileAccessDate', 'FileInodeChangeDate',
|
||||
'FilePermissions', 'FileType', 'FileTypeExtension',
|
||||
@@ -53,45 +67,85 @@ class PNGParser(__ImageParser):
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
|
||||
try: # better fail here than later
|
||||
cairo.ImageSurface.create_from_png(self.filename)
|
||||
except MemoryError:
|
||||
except: # pragma: no cover
|
||||
# Cairo is returning some weird exceptions :/
|
||||
raise ValueError
|
||||
|
||||
def remove_all(self):
|
||||
def remove_all(self) -> bool:
|
||||
if self.lightweight_cleaning:
|
||||
return self._lightweight_cleanup()
|
||||
surface = cairo.ImageSurface.create_from_png(self.filename)
|
||||
surface.write_to_png(self.output_filename)
|
||||
return True
|
||||
|
||||
|
||||
class GdkPixbufAbstractParser(__ImageParser):
|
||||
class GIFParser(exiftool.ExiftoolParser):
|
||||
mimetypes = {'image/gif'}
|
||||
meta_allowlist = {'AnimationIterations', 'BackgroundColor', 'BitsPerPixel',
|
||||
'ColorResolutionDepth', 'Directory', 'Duration',
|
||||
'ExifToolVersion', 'FileAccessDate',
|
||||
'FileInodeChangeDate', 'FileModifyDate', 'FileName',
|
||||
'FilePermissions', 'FileSize', 'FileType',
|
||||
'FileTypeExtension', 'FrameCount', 'GIFVersion',
|
||||
'HasColorMap', 'ImageHeight', 'ImageSize', 'ImageWidth',
|
||||
'MIMEType', 'Megapixels', 'SourceFile',}
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
return self._lightweight_cleanup()
|
||||
|
||||
|
||||
class GdkPixbufAbstractParser(exiftool.ExiftoolParser):
|
||||
""" GdkPixbuf can handle a lot of surfaces, so we're rending images on it,
|
||||
this has the side-effect of removing metadata completely.
|
||||
this has the side-effect of completely removing metadata.
|
||||
"""
|
||||
def remove_all(self):
|
||||
_type = ''
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
try:
|
||||
GdkPixbuf.Pixbuf.new_from_file(self.filename)
|
||||
except GLib.GError:
|
||||
raise ValueError
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
if self.lightweight_cleaning:
|
||||
return self._lightweight_cleanup()
|
||||
|
||||
_, extension = os.path.splitext(self.filename)
|
||||
pixbuf = GdkPixbuf.Pixbuf.new_from_file(self.filename)
|
||||
if extension == '.jpg':
|
||||
pixbuf = GdkPixbuf.Pixbuf.apply_embedded_orientation(pixbuf)
|
||||
if extension.lower() == '.jpg':
|
||||
extension = '.jpeg' # gdk is picky
|
||||
pixbuf.savev(self.output_filename, extension[1:], [], [])
|
||||
elif extension.lower() == '.tif':
|
||||
extension = '.tiff' # gdk is picky
|
||||
try:
|
||||
pixbuf.savev(self.output_filename, type=extension[1:],
|
||||
option_keys=[], option_values=[])
|
||||
except GLib.GError: # pragma: no cover
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
class JPGParser(GdkPixbufAbstractParser):
|
||||
_type = 'jpeg'
|
||||
mimetypes = {'image/jpeg'}
|
||||
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
||||
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
||||
'Directory', 'FileSize', 'FileModifyDate',
|
||||
'FileAccessDate', "FileInodeChangeDate",
|
||||
'FilePermissions', 'FileType', 'FileTypeExtension',
|
||||
'MIMEType', 'ImageWidth', 'ImageSize', 'BitsPerSample',
|
||||
'ColorComponents', 'EncodingProcess', 'JFIFVersion',
|
||||
'ResolutionUnit', 'XResolution', 'YCbCrSubSampling',
|
||||
'YResolution', 'Megapixels', 'ImageHeight'}
|
||||
'YResolution', 'Megapixels', 'ImageHeight', 'Orientation'}
|
||||
|
||||
|
||||
class TiffParser(GdkPixbufAbstractParser):
|
||||
_type = 'tiff'
|
||||
mimetypes = {'image/tiff'}
|
||||
meta_whitelist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
|
||||
meta_allowlist = {'Compression', 'ExifByteOrder', 'ExtraSamples',
|
||||
'FillOrder', 'PhotometricInterpretation',
|
||||
'PlanarConfiguration', 'RowsPerStrip', 'SamplesPerPixel',
|
||||
'StripByteCounts', 'StripOffsets', 'BitsPerSample',
|
||||
@@ -99,19 +153,58 @@ class TiffParser(GdkPixbufAbstractParser):
|
||||
'FileInodeChangeDate', 'FileModifyDate', 'FileName',
|
||||
'FilePermissions', 'FileSize', 'FileType',
|
||||
'FileTypeExtension', 'ImageHeight', 'ImageSize',
|
||||
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile'}
|
||||
'ImageWidth', 'MIMEType', 'Megapixels', 'SourceFile', 'Orientation'}
|
||||
|
||||
|
||||
class BMPParser(GdkPixbufAbstractParser):
|
||||
mimetypes = {'image/x-ms-bmp'}
|
||||
meta_whitelist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
|
||||
'FileSize', 'FileModifyDate', 'FileAccessDate',
|
||||
'FileInodeChangeDate', 'FilePermissions', 'FileType',
|
||||
'FileTypeExtension', 'MIMEType', 'BMPVersion',
|
||||
'ImageWidth', 'ImageHeight', 'Planes', 'BitDepth',
|
||||
'Compression', 'ImageLength', 'PixelsPerMeterX',
|
||||
'PixelsPerMeterY', 'NumColors', 'NumImportantColors',
|
||||
'RedMask', 'GreenMask', 'BlueMask', 'AlphaMask',
|
||||
'ColorSpace', 'RedEndpoint', 'GreenEndpoint',
|
||||
'BlueEndpoint', 'GammaRed', 'GammaGreen', 'GammaBlue',
|
||||
'ImageSize', 'Megapixels'}
|
||||
class PPMParser(abstract.AbstractParser):
|
||||
mimetypes = {'image/x-portable-pixmap'}
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
meta: Dict[str, Union[str, Dict[Any, Any]]] = dict()
|
||||
with open(self.filename) as f:
|
||||
for idx, line in enumerate(f):
|
||||
if line.lstrip().startswith('#'):
|
||||
meta[str(idx)] = line.lstrip().rstrip()
|
||||
return meta
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
with open(self.filename) as fin:
|
||||
with open(self.output_filename, 'w') as fout:
|
||||
for line in fin:
|
||||
if not line.lstrip().startswith('#'):
|
||||
line = re.sub(r"\s+", "", line, flags=re.UNICODE)
|
||||
fout.write(line)
|
||||
return True
|
||||
|
||||
|
||||
class HEICParser(exiftool.ExiftoolParser):
|
||||
mimetypes = {'image/heic'}
|
||||
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
|
||||
'FileSize', 'FileModifyDate', 'FileAccessDate',
|
||||
'FileInodeChangeDate', 'FilePermissions', 'FileType',
|
||||
'FileTypeExtension', 'MIMEType', 'MajorBrand', 'MinorVersion',
|
||||
'CompatibleBrands','HandlerType', 'PrimaryItemReference',
|
||||
'HEVCConfigurationVersion', 'GeneralProfileSpace',
|
||||
'GeneralTierFlag', 'GeneralProfileIDC',
|
||||
'GenProfileCompatibilityFlags', 'ConstraintIndicatorFlags',
|
||||
'GeneralLevelIDC', 'MinSpatialSegmentationIDC',
|
||||
'ParallelismType','ChromaFormat', 'BitDepthLuma', 'BitDepthChroma',
|
||||
'NumTemporalLayers', 'TemporalIDNested', 'ImageWidth',
|
||||
'ImageHeight', 'ImageSpatialExtent', 'ImagePixelDepth',
|
||||
'AverageFrameRate', 'ConstantFrameRate', 'MediaDataSize',
|
||||
'MediaDataOffset','ImageSize', 'Megapixels'}
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
return self._lightweight_cleanup()
|
||||
|
||||
class WEBPParser(GdkPixbufAbstractParser):
|
||||
mimetypes = {'image/webp'}
|
||||
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName',
|
||||
'Directory', 'FileSize', 'FileModifyDate',
|
||||
'FileAccessDate', "FileInodeChangeDate",
|
||||
'FilePermissions', 'FileType', 'FileTypeExtension',
|
||||
'MIMEType', 'ImageWidth', 'ImageSize', 'BitsPerSample',
|
||||
'ColorComponents', 'EncodingProcess', 'JFIFVersion',
|
||||
'ResolutionUnit', 'XResolution', 'YCbCrSubSampling',
|
||||
'YResolution', 'Megapixels', 'ImageHeight', 'Orientation',
|
||||
'HorizontalScale', 'VerticalScale', 'VP8Version'}
|
||||
|
@@ -1,138 +1,554 @@
|
||||
import random
|
||||
import uuid
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import tempfile
|
||||
import datetime
|
||||
import zipfile
|
||||
from typing import Dict, Set, Pattern
|
||||
from typing import Pattern, Any, Tuple, Dict
|
||||
|
||||
from . import abstract, parser_factory
|
||||
import xml.etree.ElementTree as ET # type: ignore
|
||||
|
||||
# Make pyflakes happy
|
||||
assert Set
|
||||
assert Pattern
|
||||
from .archive import ZipParser
|
||||
|
||||
class ArchiveBasedAbstractParser(abstract.AbstractParser):
|
||||
files_to_keep = set() # type: Set[str]
|
||||
files_to_omit = set() # type: Set[Pattern]
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
try: # better fail here than later
|
||||
zipfile.ZipFile(self.filename)
|
||||
except zipfile.BadZipFile:
|
||||
raise ValueError
|
||||
|
||||
def _clean_zipinfo(self, zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo:
|
||||
zipinfo.create_system = 3 # Linux
|
||||
zipinfo.comment = b''
|
||||
zipinfo.date_time = (1980, 1, 1, 0, 0, 0)
|
||||
return zipinfo
|
||||
|
||||
def _get_zipinfo_meta(self, zipinfo: zipfile.ZipInfo) -> Dict[str, str]:
|
||||
metadata = {}
|
||||
if zipinfo.create_system == 3:
|
||||
#metadata['create_system'] = 'Linux'
|
||||
pass
|
||||
elif zipinfo.create_system == 2:
|
||||
metadata['create_system'] = 'Windows'
|
||||
else:
|
||||
metadata['create_system'] = 'Weird'
|
||||
|
||||
if zipinfo.comment:
|
||||
metadata['comment'] = zipinfo.comment # type: ignore
|
||||
|
||||
if zipinfo.date_time != (1980, 1, 1, 0, 0, 0):
|
||||
metadata['date_time'] = str(datetime.datetime(*zipinfo.date_time))
|
||||
|
||||
return metadata
|
||||
# pylint: disable=line-too-long
|
||||
|
||||
|
||||
def _clean_internal_file(self, item: zipfile.ZipInfo, temp_folder: str,
|
||||
zin: zipfile.ZipFile, zout: zipfile.ZipFile) -> bool:
|
||||
zin.extract(member=item, path=temp_folder)
|
||||
full_path = os.path.join(temp_folder, item.filename)
|
||||
tmp_parser, mtype = parser_factory.get_parser(full_path) # type: ignore
|
||||
if not tmp_parser:
|
||||
zout.close()
|
||||
os.remove(self.output_filename)
|
||||
print("%s's format (%s) isn't supported" % (item.filename, mtype))
|
||||
return False
|
||||
tmp_parser.remove_all()
|
||||
def _parse_xml(full_path: str) -> Tuple[ET.ElementTree, Dict[str, str]]:
|
||||
""" This function parses XML, with namespace support. """
|
||||
namespace_map = dict()
|
||||
for _, (key, value) in ET.iterparse(full_path, ("start-ns", )):
|
||||
# The ns[0-9]+ namespaces are reserved for internal usage, so
|
||||
# we have to use an other nomenclature.
|
||||
if re.match('^ns[0-9]+$', key, re.I): # pragma: no cover
|
||||
key = 'mat' + key[2:]
|
||||
|
||||
zinfo = zipfile.ZipInfo(item.filename) # type: ignore
|
||||
clean_zinfo = self._clean_zipinfo(zinfo)
|
||||
with open(tmp_parser.output_filename, 'rb') as f:
|
||||
zout.writestr(clean_zinfo, f.read())
|
||||
return True
|
||||
namespace_map[key] = value
|
||||
ET.register_namespace(key, value)
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
zin = zipfile.ZipFile(self.filename, 'r')
|
||||
zout = zipfile.ZipFile(self.output_filename, 'w')
|
||||
temp_folder = tempfile.mkdtemp()
|
||||
|
||||
for item in zin.infolist():
|
||||
if item.filename[-1] == '/': # `is_dir` is added in Python3.6
|
||||
continue # don't keep empty folders
|
||||
elif item.filename in self.files_to_keep:
|
||||
item = self._clean_zipinfo(item)
|
||||
zout.writestr(item, zin.read(item))
|
||||
continue
|
||||
elif any(map(lambda r: r.search(item.filename), self.files_to_omit)):
|
||||
continue
|
||||
elif not self._clean_internal_file(item, temp_folder, zin, zout):
|
||||
return False
|
||||
|
||||
shutil.rmtree(temp_folder)
|
||||
zout.close()
|
||||
zin.close()
|
||||
return True
|
||||
return ET.parse(full_path), namespace_map
|
||||
|
||||
|
||||
class MSOfficeParser(ArchiveBasedAbstractParser):
|
||||
def _sort_xml_attributes(full_path: str) -> bool:
|
||||
""" Sort xml attributes lexicographically,
|
||||
because it's possible to fingerprint producers (MS Office, Libreoffice, …)
|
||||
since they are all using different orders.
|
||||
"""
|
||||
tree = ET.parse(full_path)
|
||||
|
||||
for c in tree.getroot():
|
||||
c[:] = sorted(c, key=lambda child: (child.tag, child.get('desc')))
|
||||
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
|
||||
class MSOfficeParser(ZipParser):
|
||||
"""
|
||||
The methods modifying XML documents are usually doing so in two loops:
|
||||
1. finding the tag/attributes to remove;
|
||||
2. actually editing the document
|
||||
since it's tricky to modify the XML while iterating on it.
|
||||
"""
|
||||
mimetypes = {
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation'
|
||||
}
|
||||
files_to_keep = {
|
||||
'[Content_Types].xml',
|
||||
'_rels/.rels',
|
||||
'word/_rels/document.xml.rels',
|
||||
'word/document.xml',
|
||||
'word/fontTable.xml',
|
||||
'word/settings.xml',
|
||||
'word/styles.xml',
|
||||
}
|
||||
files_to_omit = set(map(re.compile, { # type: ignore
|
||||
'^docProps/',
|
||||
}))
|
||||
content_types_to_keep = {
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.endnotes+xml', # /word/endnotes.xml
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.footnotes+xml', # /word/footnotes.xml
|
||||
'application/vnd.openxmlformats-officedocument.extended-properties+xml', # /docProps/app.xml
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml', # /word/document.xml
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml', # /word/fontTable.xml
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml', # /word/footer.xml
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml', # /word/header.xml
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml', # /word/styles.xml
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.numbering+xml', # /word/numbering.xml (used for bullet point formatting)
|
||||
'application/vnd.openxmlformats-officedocument.theme+xml', # /word/theme/theme[0-9].xml (used for font and background coloring, etc.)
|
||||
'application/vnd.openxmlformats-package.core-properties+xml', # /docProps/core.xml
|
||||
|
||||
def get_meta(self) -> Dict[str, str]:
|
||||
# for more complicated powerpoints
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.notesSlide+xml',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.notesMaster+xml',
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.handoutMaster+xml',
|
||||
'application/vnd.openxmlformats-officedocument.drawingml.diagramData+xml',
|
||||
'application/vnd.openxmlformats-officedocument.drawingml.diagramLayout+xml',
|
||||
'application/vnd.openxmlformats-officedocument.drawingml.diagramStyle+xml',
|
||||
'application/vnd.openxmlformats-officedocument.drawingml.diagramColors+xml',
|
||||
'application/vnd.ms-office.drawingml.diagramDrawing+xml',
|
||||
|
||||
# Do we want to keep the following ones?
|
||||
'application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml',
|
||||
}
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
|
||||
# MSOffice documents are using various counters for cross-references,
|
||||
# we collect them all, to make sure that they're effectively counters,
|
||||
# and not unique id used for fingerprinting.
|
||||
self.__counters = {
|
||||
'cNvPr': set(),
|
||||
'rid': set(),
|
||||
}
|
||||
|
||||
self.files_to_keep = set(map(re.compile, { # type: ignore
|
||||
r'^\[Content_Types\]\.xml$',
|
||||
r'^_rels/\.rels$',
|
||||
r'^xl/sharedStrings\.xml$', # https://docs.microsoft.com/en-us/office/open-xml/working-with-the-shared-string-table
|
||||
r'^xl/calcChain\.xml$',
|
||||
r'^(?:word|ppt|xl)/_rels/(document|workbook|presentation)\.xml\.rels$',
|
||||
r'^(?:word|ppt|xl)/_rels/footer[0-9]*\.xml\.rels$',
|
||||
r'^(?:word|ppt|xl)/_rels/header[0-9]*\.xml\.rels$',
|
||||
r'^(?:word|ppt|xl)/charts/_rels/chart[0-9]+\.xml\.rels$',
|
||||
r'^(?:word|ppt|xl)/charts/colors[0-9]+\.xml$',
|
||||
r'^(?:word|ppt|xl)/charts/style[0-9]+\.xml$',
|
||||
r'^(?:word|ppt|xl)/drawings/_rels/drawing[0-9]+\.xml\.rels$',
|
||||
r'^(?:word|ppt|xl)/styles\.xml$',
|
||||
# TODO: randomize axId ( https://docs.microsoft.com/en-us/openspecs/office_standards/ms-oi29500/089f849f-fcd6-4fa0-a281-35aa6a432a16 )
|
||||
r'^(?:word|ppt|xl)/charts/chart[0-9]*\.xml$',
|
||||
r'^xl/workbook\.xml$',
|
||||
r'^xl/worksheets/sheet[0-9]+\.xml$',
|
||||
r'^ppt/slideLayouts/_rels/slideLayout[0-9]+\.xml\.rels$',
|
||||
r'^ppt/slideLayouts/slideLayout[0-9]+\.xml$',
|
||||
r'^(?:word|ppt|xl)/tableStyles\.xml$',
|
||||
r'^(?:word|ppt|xl)/tables/table[0-9]+\.xml$',
|
||||
r'^ppt/slides/_rels/slide[0-9]*\.xml\.rels$',
|
||||
r'^ppt/slides/slide[0-9]*\.xml$',
|
||||
# https://msdn.microsoft.com/en-us/library/dd908153(v=office.12).aspx
|
||||
r'^(?:word|ppt|xl)/stylesWithEffects\.xml$',
|
||||
r'^ppt/presentation\.xml$',
|
||||
# TODO: check if p:bgRef can be randomized
|
||||
r'^ppt/slideMasters/slideMaster[0-9]+\.xml',
|
||||
r'^ppt/slideMasters/_rels/slideMaster[0-9]+\.xml\.rels',
|
||||
r'^xl/worksheets/_rels/sheet[0-9]+\.xml\.rels',
|
||||
r'^(?:word|ppt|xl)/drawings/vmlDrawing[0-9]+\.vml',
|
||||
r'^(?:word|ppt|xl)/drawings/drawing[0-9]+\.xml',
|
||||
r'^(?:word|ppt|xl)/embeddings/Microsoft_Excel_Worksheet[0-9]+\.xlsx',
|
||||
# rels for complicated powerpoints
|
||||
r'^ppt/notesSlides/_rels/notesSlide[0-9]+\.xml\.rels',
|
||||
r'^ppt/notesMasters/_rels/notesMaster[0-9]+\.xml\.rels',
|
||||
r'^ppt/handoutMasters/_rels/handoutMaster[0-9]+\.xml\.rels',
|
||||
}))
|
||||
self.files_to_omit = set(map(re.compile, { # type: ignore
|
||||
r'^\[trash\]/',
|
||||
r'^customXml/',
|
||||
r'webSettings\.xml$',
|
||||
r'^docProps/custom\.xml$',
|
||||
r'^(?:word|ppt|xl)/printerSettings/',
|
||||
r'^(?:word|ppt|xl)/theme',
|
||||
r'^(?:word|ppt|xl)/people\.xml$',
|
||||
r'^(?:word|ppt|xl)/persons/person\.xml$',
|
||||
r'^(?:word|ppt|xl)/numbering\.xml$',
|
||||
r'^(?:word|ppt|xl)/tags/',
|
||||
r'^(?:word|ppt|xl)/glossary/',
|
||||
# View properties like view mode, last viewed slide etc
|
||||
r'^(?:word|ppt|xl)/viewProps\.xml$',
|
||||
# Additional presentation-wide properties like printing properties,
|
||||
# presentation show properties etc.
|
||||
r'^(?:word|ppt|xl)/presProps\.xml$',
|
||||
r'^(?:word|ppt|xl)/comments[0-9]*\.xml$',
|
||||
r'^(?:word|ppt|xl)/threadedComments/threadedComment[0-9]*\.xml$',
|
||||
r'^(?:word|ppt|xl)/commentsExtended\.xml$',
|
||||
r'^(?:word|ppt|xl)/commentsExtensible\.xml$',
|
||||
r'^(?:word|ppt|xl)/commentsIds\.xml$',
|
||||
# we have an allowlist in self.files_to_keep,
|
||||
# so we can trash everything else
|
||||
r'^(?:word|ppt|xl)/_rels/',
|
||||
r'docMetadata/LabelInfo\.xml$'
|
||||
}))
|
||||
|
||||
if self.__fill_files_to_keep_via_content_types() is False:
|
||||
raise ValueError
|
||||
|
||||
def __fill_files_to_keep_via_content_types(self) -> bool:
|
||||
""" There is a suer-handy `[Content_Types].xml` file
|
||||
in MS Office archives, describing what each other file contains.
|
||||
The self.content_types_to_keep member contains a type allowlist,
|
||||
so we're using it to fill the self.files_to_keep one.
|
||||
"""
|
||||
with zipfile.ZipFile(self.filename) as zin:
|
||||
if '[Content_Types].xml' not in zin.namelist():
|
||||
return False
|
||||
xml_data = zin.read('[Content_Types].xml')
|
||||
|
||||
self.content_types: Dict[str, str] = dict()
|
||||
try:
|
||||
tree = ET.fromstring(xml_data)
|
||||
except ET.ParseError:
|
||||
return False
|
||||
for c in tree:
|
||||
if 'PartName' not in c.attrib or 'ContentType' not in c.attrib: # pragma: no cover
|
||||
continue
|
||||
elif c.attrib['ContentType'] in self.content_types_to_keep:
|
||||
fname = c.attrib['PartName'][1:] # remove leading `/`
|
||||
re_fname = re.compile('^' + re.escape(fname) + '$')
|
||||
self.files_to_keep.add(re_fname) # type: ignore
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def __remove_rsid(full_path: str) -> bool:
|
||||
""" The method will remove "revision session ID". We're using '}rsid'
|
||||
instead of proper parsing, since rsid can have multiple forms, like
|
||||
`rsidRDefault`, `rsidR`, `rsids`, …
|
||||
|
||||
For more details, see
|
||||
- https://msdn.microsoft.com/en-us/library/office/documentformat.openxml.wordprocessing.previoussectionproperties.rsidrpr.aspx
|
||||
- https://blogs.msdn.microsoft.com/brian_jones/2006/12/11/whats-up-with-all-those-rsids/
|
||||
"""
|
||||
try:
|
||||
tree, namespace = _parse_xml(full_path)
|
||||
except ET.ParseError as e: # pragma: no cover
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
# rsid, tags or attributes, are always under the `w` namespace
|
||||
if 'w' not in namespace:
|
||||
return True
|
||||
|
||||
parent_map = {c:p for p in tree.iter() for c in p}
|
||||
|
||||
elements_to_remove = list()
|
||||
for item in tree.iterfind('.//', namespace):
|
||||
if '}rsid' in item.tag.strip().lower(): # rsid as tag
|
||||
elements_to_remove.append(item)
|
||||
continue
|
||||
for key in list(item.attrib.keys()): # rsid as attribute
|
||||
if '}rsid' in key.lower():
|
||||
del item.attrib[key]
|
||||
|
||||
for element in elements_to_remove:
|
||||
parent_map[element].remove(element)
|
||||
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def __remove_nsid(full_path: str) -> bool:
|
||||
"""
|
||||
nsid are random identifiers that can be used to ease the merging of
|
||||
some components of a document. They can also be used for
|
||||
fingerprinting.
|
||||
|
||||
See the spec for more details: https://docs.microsoft.com/en-us/dotnet/api/documentformat.openxml.wordprocessing.nsid?view=openxml-2.8.1
|
||||
"""
|
||||
try:
|
||||
tree, namespace = _parse_xml(full_path)
|
||||
except ET.ParseError as e: # pragma: no cover
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
# The nsid tag is always under the `w` namespace
|
||||
if 'w' not in namespace:
|
||||
return True
|
||||
|
||||
parent_map = {c: p for p in tree.iter() for c in p}
|
||||
|
||||
elements_to_remove = list()
|
||||
for element in tree.iterfind('.//w:nsid', namespace):
|
||||
elements_to_remove.append(element)
|
||||
for element in elements_to_remove:
|
||||
parent_map[element].remove(element)
|
||||
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def __remove_revisions(full_path: str) -> bool:
|
||||
try:
|
||||
tree, namespace = _parse_xml(full_path)
|
||||
except ET.ParseError as e: # pragma: no cover
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
# Revisions are either deletions (`w:del`) or
|
||||
# insertions (`w:ins`)
|
||||
del_presence = tree.find('.//w:del', namespace)
|
||||
ins_presence = tree.find('.//w:ins', namespace)
|
||||
if del_presence is None and ins_presence is None:
|
||||
return True # No revisions are present
|
||||
|
||||
parent_map = {c:p for p in tree.iter() for c in p}
|
||||
|
||||
elements_del = list()
|
||||
for element in tree.iterfind('.//w:del', namespace):
|
||||
elements_del.append(element)
|
||||
for element in elements_del:
|
||||
parent_map[element].remove(element)
|
||||
|
||||
elements_ins = list()
|
||||
for element in tree.iterfind('.//w:ins', namespace):
|
||||
for position, item in enumerate(tree.iter()): # pragma: no cover
|
||||
if item == element:
|
||||
for children in element.iterfind('./*'):
|
||||
elements_ins.append((element, position, children))
|
||||
break
|
||||
|
||||
for (element, position, children) in elements_ins:
|
||||
parent_map[element].insert(position, children)
|
||||
|
||||
# the list can sometimes contain duplicate elements, so don't remove
|
||||
# until all children have been processed
|
||||
for (element, position, children) in elements_ins:
|
||||
if element in parent_map[element]:
|
||||
parent_map[element].remove(element)
|
||||
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def __remove_document_comment_meta(full_path: str) -> bool:
|
||||
try:
|
||||
tree, namespace = _parse_xml(full_path)
|
||||
except ET.ParseError as e: # pragma: no cover
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
# search the docs to see if we can bail early
|
||||
range_start = tree.find('.//w:commentRangeStart', namespace)
|
||||
range_end = tree.find('.//w:commentRangeEnd', namespace)
|
||||
references = tree.find('.//w:commentReference', namespace)
|
||||
if range_start is None and range_end is None and references is None:
|
||||
return True # No comment meta tags are present
|
||||
|
||||
parent_map = {c:p for p in tree.iter() for c in p}
|
||||
|
||||
# iterate over the elements and add them to list
|
||||
elements_del = list()
|
||||
for element in tree.iterfind('.//w:commentRangeStart', namespace):
|
||||
elements_del.append(element)
|
||||
for element in tree.iterfind('.//w:commentRangeEnd', namespace):
|
||||
elements_del.append(element)
|
||||
for element in tree.iterfind('.//w:commentReference', namespace):
|
||||
elements_del.append(element)
|
||||
|
||||
# remove the elements
|
||||
for element in elements_del:
|
||||
parent_map[element].remove(element)
|
||||
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
def __remove_document_xml_rels_members(self, full_path: str) -> bool:
|
||||
""" Remove the dangling references from the word/_rels/document.xml.rels file, since MS office doesn't like them.
|
||||
"""
|
||||
try:
|
||||
tree, namespace = _parse_xml(full_path)
|
||||
except ET.ParseError as e: # pragma: no cover
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
if len(namespace.items()) != 1: # pragma: no cover
|
||||
logging.debug("Got several namespaces for Types: %s", namespace.items())
|
||||
|
||||
removed_fnames = set()
|
||||
with zipfile.ZipFile(self.filename) as zin:
|
||||
for fname in [item.filename for item in zin.infolist()]:
|
||||
for file_to_omit in self.files_to_omit:
|
||||
if file_to_omit.search(fname):
|
||||
matches = map(lambda r: r.search(fname), self.files_to_keep)
|
||||
if any(matches): # the file is in the allowlist
|
||||
continue
|
||||
removed_fnames.add(fname)
|
||||
break
|
||||
|
||||
root = tree.getroot()
|
||||
for item in root.findall('{%s}Relationship' % namespace['']):
|
||||
name = 'word/' + item.attrib['Target'] # add the word/ prefix to the path, since all document rels are in the word/ directory
|
||||
if name in removed_fnames:
|
||||
root.remove(item)
|
||||
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
def __remove_content_type_members(self, full_path: str) -> bool:
|
||||
""" The method will remove the dangling references
|
||||
form the [Content_Types].xml file, since MS office doesn't like them
|
||||
"""
|
||||
try:
|
||||
tree, namespace = _parse_xml(full_path)
|
||||
except ET.ParseError as e: # pragma: no cover
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
if len(namespace.items()) != 1: # pragma: no cover
|
||||
logging.debug("Got several namespaces for Types: %s", namespace.items())
|
||||
|
||||
removed_fnames = set()
|
||||
with zipfile.ZipFile(self.filename) as zin:
|
||||
for fname in [item.filename for item in zin.infolist()]:
|
||||
for file_to_omit in self.files_to_omit:
|
||||
if file_to_omit.search(fname):
|
||||
matches = map(lambda r: r.search(fname), self.files_to_keep)
|
||||
if any(matches): # the file is in the allowlist
|
||||
continue
|
||||
removed_fnames.add(fname)
|
||||
break
|
||||
|
||||
root = tree.getroot()
|
||||
for item in root.findall('{%s}Override' % namespace['']):
|
||||
name = item.attrib['PartName'][1:] # remove the leading '/'
|
||||
if name in removed_fnames:
|
||||
root.remove(item)
|
||||
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
def _final_checks(self) -> bool:
|
||||
for k, v in self.__counters.items():
|
||||
if v and len(v) != max(v):
|
||||
# TODO: make this an error and return False
|
||||
# once the ability to correct the counters is implemented
|
||||
logging.warning("%s contains invalid %s: %s", self.filename, k, v)
|
||||
return True
|
||||
return True
|
||||
|
||||
def __collect_counters(self, full_path: str):
|
||||
with open(full_path, encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
# "relationship Id"
|
||||
for i in re.findall(r'(?:\s|r:)[iI][dD]="rId([0-9]+)"(?:\s|/)', content):
|
||||
self.__counters['rid'].add(int(i))
|
||||
# "connector for Non-visual property"
|
||||
for i in re.findall(r'<p:cNvPr id="([0-9]+)"', content):
|
||||
self.__counters['cNvPr'].add(int(i))
|
||||
|
||||
@staticmethod
|
||||
def __randomize_creationId(full_path: str) -> bool:
|
||||
try:
|
||||
tree, namespace = _parse_xml(full_path)
|
||||
except ET.ParseError as e: # pragma: no cover
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
if 'p14' not in namespace:
|
||||
return True # pragma: no cover
|
||||
|
||||
for item in tree.iterfind('.//p14:creationId', namespace):
|
||||
item.set('val', '%s' % random.randint(0, 2**32))
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def __randomize_sldMasterId(full_path: str) -> bool:
|
||||
try:
|
||||
tree, namespace = _parse_xml(full_path)
|
||||
except ET.ParseError as e: # pragma: no cover
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
if 'p' not in namespace:
|
||||
return True # pragma: no cover
|
||||
|
||||
for item in tree.iterfind('.//p:sldMasterId', namespace):
|
||||
item.set('id', '%s' % random.randint(0, 2**32))
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
def _specific_cleanup(self, full_path: str) -> bool:
|
||||
# pylint: disable=too-many-return-statements,too-many-branches
|
||||
if os.stat(full_path).st_size == 0: # Don't process empty files
|
||||
return True
|
||||
|
||||
if not full_path.endswith(('.xml', '.xml.rels')):
|
||||
return True
|
||||
|
||||
if self.__randomize_creationId(full_path) is False:
|
||||
return False
|
||||
|
||||
self.__collect_counters(full_path)
|
||||
|
||||
if full_path.endswith('/[Content_Types].xml'):
|
||||
# this file contains references to files that we might
|
||||
# remove, and MS Office doesn't like dangling references
|
||||
if self.__remove_content_type_members(full_path) is False: # pragma: no cover
|
||||
return False
|
||||
elif full_path.endswith('/word/document.xml'):
|
||||
# this file contains the revisions
|
||||
if self.__remove_revisions(full_path) is False:
|
||||
return False # pragma: no cover
|
||||
# remove comment references and ranges
|
||||
if self.__remove_document_comment_meta(full_path) is False:
|
||||
return False # pragma: no cover
|
||||
elif full_path.endswith('/word/_rels/document.xml.rels'):
|
||||
# similar to the above, but for the document.xml.rels file
|
||||
if self.__remove_document_xml_rels_members(full_path) is False: # pragma: no cover
|
||||
return False
|
||||
elif full_path.endswith('/docProps/app.xml'):
|
||||
# This file must be present and valid,
|
||||
# so we're removing as much as we can.
|
||||
with open(full_path, 'wb') as f:
|
||||
f.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
|
||||
f.write(b'<Properties xmlns="http://schemas.openxmlformats.org/officeDocument/2006/extended-properties">')
|
||||
f.write(b'</Properties>')
|
||||
elif full_path.endswith('/docProps/core.xml'):
|
||||
# This file must be present and valid,
|
||||
# so we're removing as much as we can.
|
||||
with open(full_path, 'wb') as f:
|
||||
f.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
|
||||
f.write(b'<cp:coreProperties xmlns:cp="http://schemas.openxmlformats.org/package/2006/metadata/core-properties">')
|
||||
f.write(b'</cp:coreProperties>')
|
||||
elif full_path.endswith('/ppt/tableStyles.xml'): # pragma: no cover
|
||||
# This file must be present and valid,
|
||||
# so we're removing as much as we can.
|
||||
with open(full_path, 'wb') as f:
|
||||
f.write(b'<?xml version="1.0" encoding="UTF-8" standalone="yes"?>')
|
||||
uid = str(uuid.uuid4()).encode('utf-8')
|
||||
f.write(b'<a:tblStyleLst def="{%s}" xmlns:a="http://schemas.openxmlformats.org/drawingml/2006/main"/>' % uid)
|
||||
elif full_path.endswith('ppt/presentation.xml'):
|
||||
if self.__randomize_sldMasterId(full_path) is False:
|
||||
return False # pragma: no cover
|
||||
|
||||
if self.__remove_rsid(full_path) is False:
|
||||
return False # pragma: no cover
|
||||
|
||||
if self.__remove_nsid(full_path) is False:
|
||||
return False # pragma: no cover
|
||||
|
||||
try:
|
||||
_sort_xml_attributes(full_path)
|
||||
except ET.ParseError as e: # pragma: no cover
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
# This is awful, I'm sorry.
|
||||
#
|
||||
# Microsoft Office isn't happy when we have the `mc:Ignorable`
|
||||
# tag containing namespaces that aren't present in the xml file,
|
||||
# so instead of trying to remove this specific tag with etree,
|
||||
# we're removing it, with a regexp.
|
||||
#
|
||||
# Since we're the ones producing this file, via the call to
|
||||
# _sort_xml_attributes, there won't be any "funny tricks".
|
||||
# Worst case, the tag isn't present, and everything is fine.
|
||||
#
|
||||
# see: https://docs.microsoft.com/en-us/dotnet/framework/wpf/advanced/mc-ignorable-attribute
|
||||
with open(full_path, 'rb') as f:
|
||||
text = f.read()
|
||||
out = re.sub(b'mc:Ignorable="[^"]*"', b'', text, count=1)
|
||||
with open(full_path, 'wb') as f:
|
||||
f.write(out)
|
||||
|
||||
return True
|
||||
|
||||
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Yes, I know that parsing xml with regexp ain't pretty,
|
||||
be my guest and fix it if you want.
|
||||
"""
|
||||
metadata = {}
|
||||
zipin = zipfile.ZipFile(self.filename)
|
||||
for item in zipin.infolist():
|
||||
if item.filename.startswith('docProps/') and item.filename.endswith('.xml'):
|
||||
content = zipin.read(item).decode('utf-8')
|
||||
try:
|
||||
results = re.findall(r"<(.+)>(.+)</\1>", content, re.I|re.M)
|
||||
for (key, value) in results:
|
||||
metadata[key] = value
|
||||
except TypeError: # We didn't manage to parse the xml file
|
||||
pass
|
||||
if not metadata: # better safe than sorry
|
||||
metadata[item] = 'harmful content'
|
||||
for key, value in self._get_zipinfo_meta(item).items():
|
||||
metadata[key] = value
|
||||
zipin.close()
|
||||
return metadata
|
||||
if not file_path.startswith('docProps/') or not file_path.endswith('.xml'):
|
||||
return {}
|
||||
|
||||
with open(full_path, encoding='utf-8') as f:
|
||||
try:
|
||||
results = re.findall(r"<(.+)>(.+)</\1>", f.read(), re.I | re.M)
|
||||
return {k: v for (k, v) in results}
|
||||
except (TypeError, UnicodeDecodeError):
|
||||
# We didn't manage to parse the xml file
|
||||
return {file_path: 'harmful content', }
|
||||
|
||||
|
||||
class LibreOfficeParser(ArchiveBasedAbstractParser):
|
||||
class LibreOfficeParser(ZipParser):
|
||||
mimetypes = {
|
||||
'application/vnd.oasis.opendocument.text',
|
||||
'application/vnd.oasis.opendocument.spreadsheet',
|
||||
@@ -142,39 +558,70 @@ class LibreOfficeParser(ArchiveBasedAbstractParser):
|
||||
'application/vnd.oasis.opendocument.formula',
|
||||
'application/vnd.oasis.opendocument.image',
|
||||
}
|
||||
files_to_keep = {
|
||||
'META-INF/manifest.xml',
|
||||
'content.xml',
|
||||
'manifest.rdf',
|
||||
'mimetype',
|
||||
'settings.xml',
|
||||
'styles.xml',
|
||||
}
|
||||
files_to_omit = set(map(re.compile, { # type: ignore
|
||||
'^meta\.xml$',
|
||||
'^Configurations2/',
|
||||
}))
|
||||
|
||||
def get_meta(self) -> Dict[str, str]:
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
|
||||
self.files_to_keep = set(map(re.compile, { # type: ignore
|
||||
r'^META-INF/manifest\.xml$',
|
||||
r'^content\.xml$',
|
||||
r'^manifest\.rdf$',
|
||||
r'^mimetype$',
|
||||
r'^settings\.xml$',
|
||||
r'^styles\.xml$',
|
||||
}))
|
||||
self.files_to_omit = set(map(re.compile, { # type: ignore
|
||||
r'^meta\.xml$',
|
||||
r'^layout-cache$',
|
||||
r'^Configurations2/',
|
||||
r'^Thumbnails/',
|
||||
}))
|
||||
|
||||
@staticmethod
|
||||
def __remove_revisions(full_path: str) -> bool:
|
||||
try:
|
||||
tree, namespace = _parse_xml(full_path)
|
||||
except ET.ParseError as e:
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
|
||||
if 'office' not in namespace: # no revisions in the current file
|
||||
return True
|
||||
|
||||
for text in tree.getroot().iterfind('.//office:text', namespace):
|
||||
for changes in text.iterfind('.//text:tracked-changes', namespace):
|
||||
text.remove(changes)
|
||||
|
||||
tree.write(full_path, xml_declaration=True, encoding='utf-8')
|
||||
return True
|
||||
|
||||
def _specific_cleanup(self, full_path: str) -> bool:
|
||||
if os.stat(full_path).st_size == 0: # Don't process empty files
|
||||
return True
|
||||
|
||||
if os.path.basename(full_path).endswith('.xml'):
|
||||
if os.path.basename(full_path) == 'content.xml':
|
||||
if self.__remove_revisions(full_path) is False:
|
||||
return False
|
||||
|
||||
try:
|
||||
_sort_xml_attributes(full_path)
|
||||
except ET.ParseError as e:
|
||||
logging.error("Unable to parse %s: %s", full_path, e)
|
||||
return False
|
||||
return True
|
||||
|
||||
def _specific_get_meta(self, full_path: str, file_path: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Yes, I know that parsing xml with regexp ain't pretty,
|
||||
be my guest and fix it if you want.
|
||||
"""
|
||||
metadata = {}
|
||||
zipin = zipfile.ZipFile(self.filename)
|
||||
for item in zipin.infolist():
|
||||
if item.filename == 'meta.xml':
|
||||
content = zipin.read(item).decode('utf-8')
|
||||
try:
|
||||
results = re.findall(r"<((?:meta|dc|cp).+?)>(.+)</\1>", content, re.I|re.M)
|
||||
for (key, value) in results:
|
||||
metadata[key] = value
|
||||
except TypeError: # We didn't manage to parse the xml file
|
||||
pass
|
||||
if not metadata: # better safe than sorry
|
||||
metadata[item] = 'harmful content'
|
||||
for key, value in self._get_zipinfo_meta(item).items():
|
||||
metadata[key] = value
|
||||
zipin.close()
|
||||
return metadata
|
||||
|
||||
if file_path != 'meta.xml':
|
||||
return {}
|
||||
with open(full_path, encoding='utf-8') as f:
|
||||
try:
|
||||
results = re.findall(r"<((?:meta|dc|cp).+?)[^>]*>(.+)</\1>", f.read(), re.I|re.M)
|
||||
return {k:v for (k, v) in results}
|
||||
except (TypeError, UnicodeDecodeError): # We didn't manage to parse the xml file
|
||||
# We didn't manage to parse the xml file
|
||||
return {file_path: 'harmful content', }
|
||||
|
@@ -2,26 +2,38 @@ import glob
|
||||
import os
|
||||
import mimetypes
|
||||
import importlib
|
||||
from typing import TypeVar, List, Tuple, Optional
|
||||
from typing import TypeVar, Optional, List, Tuple
|
||||
|
||||
from . import abstract, unsupported_extensions
|
||||
|
||||
assert Tuple # make pyflakes happy
|
||||
from . import abstract, UNSUPPORTED_EXTENSIONS
|
||||
|
||||
T = TypeVar('T', bound='abstract.AbstractParser')
|
||||
|
||||
mimetypes.add_type('application/epub+zip', '.epub')
|
||||
mimetypes.add_type('application/x-dtbncx+xml', '.ncx') # EPUB Navigation Control XML File
|
||||
|
||||
# This should be removed after we move to python3.10
|
||||
# https://github.com/python/cpython/commit/20a5b7e986377bdfd929d7e8c4e3db5847dfdb2d
|
||||
mimetypes.add_type('image/heic', '.heic')
|
||||
|
||||
|
||||
def __load_all_parsers():
|
||||
""" Loads every parser in a dynamic way """
|
||||
current_dir = os.path.dirname(__file__)
|
||||
for name in glob.glob(os.path.join(current_dir, '*.py')):
|
||||
if name.endswith('abstract.py') or name.endswith('__init__.py'):
|
||||
for fname in glob.glob(os.path.join(current_dir, '*.py')):
|
||||
if fname.endswith('abstract.py'):
|
||||
continue
|
||||
basename = os.path.basename(name)
|
||||
elif fname.endswith('__init__.py'):
|
||||
continue
|
||||
elif fname.endswith('exiftool.py'):
|
||||
continue
|
||||
basename = os.path.basename(fname)
|
||||
name, _ = os.path.splitext(basename)
|
||||
importlib.import_module('.' + name, package='libmat2')
|
||||
|
||||
|
||||
__load_all_parsers()
|
||||
|
||||
|
||||
def _get_parsers() -> List[T]:
|
||||
""" Get all our parsers!"""
|
||||
def __get_parsers(cls):
|
||||
@@ -31,16 +43,22 @@ def _get_parsers() -> List[T]:
|
||||
|
||||
|
||||
def get_parser(filename: str) -> Tuple[Optional[T], Optional[str]]:
|
||||
""" Return the appropriate parser for a given filename.
|
||||
|
||||
:raises ValueError: Raised if the instantiation of the parser went wrong.
|
||||
"""
|
||||
mtype, _ = mimetypes.guess_type(filename)
|
||||
|
||||
_, extension = os.path.splitext(filename)
|
||||
if extension in unsupported_extensions:
|
||||
if extension.lower() in UNSUPPORTED_EXTENSIONS:
|
||||
return None, mtype
|
||||
|
||||
for c in _get_parsers(): # type: ignore
|
||||
if mtype in c.mimetypes:
|
||||
try:
|
||||
return c(filename), mtype
|
||||
except ValueError:
|
||||
return None, mtype
|
||||
if mtype == 'application/x-tar':
|
||||
if extension[1:] in ('bz2', 'gz', 'xz'):
|
||||
mtype = mtype + '+' + extension[1:]
|
||||
|
||||
for parser_class in _get_parsers(): # type: ignore
|
||||
if mtype in parser_class.mimetypes:
|
||||
# This instantiation might raise a ValueError on malformed files
|
||||
return parser_class(filename), mtype
|
||||
return None, mtype
|
||||
|
@@ -7,6 +7,7 @@ import re
|
||||
import logging
|
||||
import tempfile
|
||||
import io
|
||||
from typing import Union, Dict
|
||||
|
||||
import cairo
|
||||
import gi
|
||||
@@ -15,7 +16,7 @@ from gi.repository import Poppler, GLib
|
||||
|
||||
from . import abstract
|
||||
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
FIXED_PDF_VERSION = cairo.PDFVersion.VERSION_1_5
|
||||
|
||||
|
||||
class PDFParser(abstract.AbstractParser):
|
||||
@@ -27,13 +28,21 @@ class PDFParser(abstract.AbstractParser):
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
self.uri = 'file://' + os.path.abspath(self.filename)
|
||||
self.__scale = 2 # how much precision do we want for the render
|
||||
self.__scale = 200 / 72.0 # how much precision do we want for the render
|
||||
try: # Check now that the file is valid, to avoid surprises later
|
||||
Poppler.Document.new_from_file(self.uri, None)
|
||||
except GLib.GError: # Invalid PDF
|
||||
raise ValueError
|
||||
|
||||
def remove_all_lightweight(self):
|
||||
def remove_all(self) -> bool:
|
||||
if self.lightweight_cleaning is True:
|
||||
try:
|
||||
return self.__remove_all_lightweight()
|
||||
except (cairo.Error, MemoryError) as e:
|
||||
raise RuntimeError(e)
|
||||
return self.__remove_all_thorough()
|
||||
|
||||
def __remove_all_lightweight(self) -> bool:
|
||||
"""
|
||||
Load the document into Poppler, render pages on a new PDFSurface.
|
||||
"""
|
||||
@@ -41,7 +50,8 @@ class PDFParser(abstract.AbstractParser):
|
||||
pages_count = document.get_n_pages()
|
||||
|
||||
tmp_path = tempfile.mkstemp()[1]
|
||||
pdf_surface = cairo.PDFSurface(tmp_path, 10, 10)
|
||||
pdf_surface = cairo.PDFSurface(tmp_path, 10, 10) # resized later anyway
|
||||
pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
|
||||
pdf_context = cairo.Context(pdf_surface) # context draws on the surface
|
||||
|
||||
for pagenum in range(pages_count):
|
||||
@@ -60,7 +70,7 @@ class PDFParser(abstract.AbstractParser):
|
||||
|
||||
return True
|
||||
|
||||
def remove_all(self):
|
||||
def __remove_all_thorough(self) -> bool:
|
||||
"""
|
||||
Load the document into Poppler, render pages on PNG,
|
||||
and shove those PNG into a new PDF.
|
||||
@@ -70,14 +80,20 @@ class PDFParser(abstract.AbstractParser):
|
||||
|
||||
_, tmp_path = tempfile.mkstemp()
|
||||
pdf_surface = cairo.PDFSurface(tmp_path, 32, 32) # resized later anyway
|
||||
pdf_surface.restrict_to_version(FIXED_PDF_VERSION)
|
||||
pdf_context = cairo.Context(pdf_surface)
|
||||
|
||||
for pagenum in range(pages_count):
|
||||
page = document.get_page(pagenum)
|
||||
if page is None: # pragma: no cover
|
||||
logging.error("Unable to get PDF pages")
|
||||
return False
|
||||
page_width, page_height = page.get_size()
|
||||
logging.info("Rendering page %d/%d", pagenum + 1, pages_count)
|
||||
|
||||
img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, int(page_width) * self.__scale, int(page_height) * self.__scale)
|
||||
width = int(page_width * self.__scale)
|
||||
height = int(page_height * self.__scale)
|
||||
img_surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height)
|
||||
img_context = cairo.Context(img_surface)
|
||||
|
||||
img_context.scale(self.__scale, self.__scale)
|
||||
@@ -90,10 +106,14 @@ class PDFParser(abstract.AbstractParser):
|
||||
buf.seek(0)
|
||||
|
||||
img = cairo.ImageSurface.create_from_png(buf)
|
||||
pdf_surface.set_size(page_width*self.__scale, page_height*self.__scale)
|
||||
if cairo.version_info < (1, 12, 0):
|
||||
pdf_surface.set_size(width, height)
|
||||
else:
|
||||
pdf_surface.set_size(page_width, page_height)
|
||||
pdf_surface.set_device_scale(1 / self.__scale, 1 / self.__scale)
|
||||
pdf_context.set_source_surface(img, 0, 0)
|
||||
pdf_context.paint()
|
||||
pdf_context.show_page()
|
||||
pdf_context.show_page() # draw pdf_context on pdf_surface
|
||||
|
||||
pdf_surface.finish()
|
||||
|
||||
@@ -110,17 +130,27 @@ class PDFParser(abstract.AbstractParser):
|
||||
document.set_creator('')
|
||||
document.set_creation_date(-1)
|
||||
document.save('file://' + os.path.abspath(out_file))
|
||||
|
||||
# Cairo adds "/Producer" and "/CreationDate", and Poppler sometimes
|
||||
# fails to remove them, we have to use this terrible regex.
|
||||
# It should(tm) be alright though, because cairo's output format
|
||||
# for metadata is fixed.
|
||||
with open(out_file, 'rb') as f:
|
||||
out = re.sub(rb'<<[\s\n]*/Producer.*?>>', b' << >>', f.read(),
|
||||
count=0, flags=re.DOTALL | re.IGNORECASE)
|
||||
with open(out_file, 'wb') as f:
|
||||
f.write(out)
|
||||
|
||||
return True
|
||||
|
||||
|
||||
@staticmethod
|
||||
def __parse_metadata_field(data: str) -> dict:
|
||||
def __parse_metadata_field(data: str) -> Dict[str, str]:
|
||||
metadata = {}
|
||||
for (_, key, value) in re.findall(r"<(xmp|pdfx|pdf|xmpMM):(.+)>(.+)</\1:\2>", data, re.I):
|
||||
metadata[key] = value
|
||||
return metadata
|
||||
|
||||
def get_meta(self):
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
""" Return a dict with all the meta of the file
|
||||
"""
|
||||
metadata = {}
|
||||
|
@@ -1,43 +1,42 @@
|
||||
import logging
|
||||
from typing import Union, Tuple, Dict
|
||||
from typing import Union, Dict, List, Tuple
|
||||
|
||||
from . import abstract
|
||||
|
||||
|
||||
class TorrentParser(abstract.AbstractParser):
|
||||
mimetypes = {'application/x-bittorrent', }
|
||||
whitelist = {b'announce', b'announce-list', b'info'}
|
||||
allowlist = {b'announce', b'announce-list', b'info'}
|
||||
|
||||
def get_meta(self) -> Dict[str, str]:
|
||||
metadata = {}
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
with open(self.filename, 'rb') as f:
|
||||
d = _BencodeHandler().bdecode(f.read())
|
||||
if d is None:
|
||||
return {'Unknown meta': 'Unable to parse torrent file "%s".' % self.filename}
|
||||
for k, v in d.items():
|
||||
if k not in self.whitelist:
|
||||
metadata[k.decode('utf-8')] = v
|
||||
return metadata
|
||||
self.dict_repr = _BencodeHandler().bdecode(f.read())
|
||||
if self.dict_repr is None:
|
||||
raise ValueError
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
metadata = {}
|
||||
for key, value in self.dict_repr.items():
|
||||
if key not in self.allowlist:
|
||||
metadata[key.decode('utf-8')] = value
|
||||
return metadata
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
cleaned = dict()
|
||||
with open(self.filename, 'rb') as f:
|
||||
d = _BencodeHandler().bdecode(f.read())
|
||||
if d is None:
|
||||
return False
|
||||
for k, v in d.items():
|
||||
if k in self.whitelist:
|
||||
cleaned[k] = v
|
||||
for key, value in self.dict_repr.items():
|
||||
if key in self.allowlist:
|
||||
cleaned[key] = value
|
||||
with open(self.output_filename, 'wb') as f:
|
||||
f.write(_BencodeHandler().bencode(cleaned))
|
||||
self.dict_repr = cleaned # since we're stateful
|
||||
return True
|
||||
|
||||
|
||||
class _BencodeHandler(object):
|
||||
class _BencodeHandler:
|
||||
"""
|
||||
Since bencode isn't that hard to parse,
|
||||
MAT2 comes with its own parser, based on the spec
|
||||
mat2 comes with its own parser, based on the spec
|
||||
https://wiki.theory.org/index.php/BitTorrentSpecification#Bencoding
|
||||
"""
|
||||
def __init__(self):
|
||||
@@ -60,8 +59,6 @@ class _BencodeHandler(object):
|
||||
def __decode_int(s: bytes) -> Tuple[int, bytes]:
|
||||
s = s[1:]
|
||||
next_idx = s.index(b'e')
|
||||
if next_idx is None:
|
||||
raise ValueError # missing suffix
|
||||
if s.startswith(b'-0'):
|
||||
raise ValueError # negative zero doesn't exist
|
||||
elif s.startswith(b'0') and next_idx != 1:
|
||||
@@ -70,32 +67,30 @@ class _BencodeHandler(object):
|
||||
|
||||
@staticmethod
|
||||
def __decode_string(s: bytes) -> Tuple[bytes, bytes]:
|
||||
sep = s.index(b':')
|
||||
if set is None:
|
||||
raise ValueError # missing suffix
|
||||
str_len = int(s[:sep])
|
||||
if str_len < 0:
|
||||
raise ValueError
|
||||
elif s[0] == b'0' and sep != 1:
|
||||
colon = s.index(b':')
|
||||
# FIXME Python3 is broken here, the call to `ord` shouldn't be needed,
|
||||
# but apparently it is. This is utterly idiotic.
|
||||
if (s[0] == ord('0') or s[0] == '0') and colon != 1:
|
||||
raise ValueError
|
||||
str_len = int(s[:colon])
|
||||
s = s[1:]
|
||||
return s[sep:sep+str_len], s[sep+str_len:]
|
||||
return s[colon:colon+str_len], s[colon+str_len:]
|
||||
|
||||
def __decode_list(self, s: bytes) -> Tuple[list, bytes]:
|
||||
r = list()
|
||||
def __decode_list(self, s: bytes) -> Tuple[List, bytes]:
|
||||
ret = list()
|
||||
s = s[1:] # skip leading `l`
|
||||
while s[0] != ord('e'):
|
||||
v, s = self.__decode_func[s[0]](s)
|
||||
r.append(v)
|
||||
return r, s[1:]
|
||||
value, s = self.__decode_func[s[0]](s)
|
||||
ret.append(value)
|
||||
return ret, s[1:]
|
||||
|
||||
def __decode_dict(self, s: bytes) -> Tuple[dict, bytes]:
|
||||
r = dict()
|
||||
def __decode_dict(self, s: bytes) -> Tuple[Dict, bytes]:
|
||||
ret = dict()
|
||||
s = s[1:] # skip leading `d`
|
||||
while s[0] != ord(b'e'):
|
||||
k, s = self.__decode_string(s)
|
||||
r[k], s = self.__decode_func[s[0]](s)
|
||||
return r, s[1:]
|
||||
key, s = self.__decode_string(s)
|
||||
ret[key], s = self.__decode_func[s[0]](s)
|
||||
return ret, s[1:]
|
||||
|
||||
@staticmethod
|
||||
def __encode_int(x: bytes) -> bytes:
|
||||
@@ -113,21 +108,21 @@ class _BencodeHandler(object):
|
||||
|
||||
def __encode_dict(self, x: dict) -> bytes:
|
||||
ret = b''
|
||||
for k, v in sorted(x.items()):
|
||||
ret += self.__encode_func[type(k)](k)
|
||||
ret += self.__encode_func[type(v)](v)
|
||||
for key, value in sorted(x.items()):
|
||||
ret += self.__encode_func[type(key)](key)
|
||||
ret += self.__encode_func[type(value)](value)
|
||||
return b'd' + ret + b'e'
|
||||
|
||||
def bencode(self, s: Union[dict, list, bytes, int]) -> bytes:
|
||||
def bencode(self, s: Union[Dict, List, bytes, int]) -> bytes:
|
||||
return self.__encode_func[type(s)](s)
|
||||
|
||||
def bdecode(self, s: bytes) -> Union[dict, None]:
|
||||
def bdecode(self, s: bytes) -> Union[Dict, None]:
|
||||
try:
|
||||
r, l = self.__decode_func[s[0]](s)
|
||||
ret, trail = self.__decode_func[s[0]](s)
|
||||
except (IndexError, KeyError, ValueError) as e:
|
||||
logging.debug("Not a valid bencoded string: %s" % e)
|
||||
logging.warning("Not a valid bencoded string: %s", e)
|
||||
return None
|
||||
if l != b'':
|
||||
logging.debug("Invalid bencoded value (data after valid prefix)")
|
||||
if trail != b'':
|
||||
logging.warning("Invalid bencoded value (data after valid prefix)")
|
||||
return None
|
||||
return r
|
||||
return ret
|
||||
|
144
libmat2/video.py
Normal file
@@ -0,0 +1,144 @@
|
||||
import subprocess
|
||||
import functools
|
||||
import shutil
|
||||
import logging
|
||||
|
||||
from typing import Union, Dict
|
||||
|
||||
from . import exiftool
|
||||
from . import bubblewrap
|
||||
|
||||
|
||||
class AbstractFFmpegParser(exiftool.ExiftoolParser):
|
||||
""" Abstract parser for all FFmpeg-based ones, mainly for video. """
|
||||
# Some fileformats have mandatory metadata fields
|
||||
meta_key_value_allowlist: Dict[str, Union[str, int]] = dict()
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
if self.meta_key_value_allowlist:
|
||||
logging.warning('The format of "%s" (%s) has some mandatory '
|
||||
'metadata fields; mat2 filled them with standard '
|
||||
'data.', self.filename, ', '.join(self.mimetypes))
|
||||
cmd = [_get_ffmpeg_path(),
|
||||
'-i', self.filename, # input file
|
||||
'-y', # overwrite existing output file
|
||||
'-map', '0', # copy everything all streams from input to output
|
||||
'-codec', 'copy', # don't decode anything, just copy (speed!)
|
||||
'-loglevel', 'panic', # Don't show log
|
||||
'-hide_banner', # hide the banner
|
||||
'-map_metadata', '-1', # remove supperficial metadata
|
||||
'-map_chapters', '-1', # remove chapters
|
||||
'-disposition', '0', # Remove dispositions (check ffmpeg's manpage)
|
||||
'-fflags', '+bitexact', # don't add any metadata
|
||||
'-flags:v', '+bitexact', # don't add any metadata
|
||||
'-flags:a', '+bitexact', # don't add any metadata
|
||||
self.output_filename]
|
||||
try:
|
||||
if self.sandbox:
|
||||
bubblewrap.run(cmd, check=True,
|
||||
input_filename=self.filename,
|
||||
output_filename=self.output_filename)
|
||||
else:
|
||||
subprocess.run(cmd, check=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
logging.error("Something went wrong during the processing of %s: %s", self.filename, e)
|
||||
return False
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Union[str, Dict]]:
|
||||
meta = super().get_meta()
|
||||
|
||||
ret: Dict[str, Union[str, Dict]] = dict()
|
||||
for key, value in meta.items():
|
||||
if key in self.meta_key_value_allowlist:
|
||||
if value == self.meta_key_value_allowlist[key]:
|
||||
continue
|
||||
ret[key] = value
|
||||
return ret
|
||||
|
||||
|
||||
class WMVParser(AbstractFFmpegParser):
|
||||
mimetypes = {'video/x-ms-wmv', }
|
||||
meta_allowlist = {'AudioChannels', 'AudioCodecID', 'AudioCodecName',
|
||||
'ErrorCorrectionType', 'AudioSampleRate', 'DataPackets',
|
||||
'Directory', 'Duration', 'ExifToolVersion',
|
||||
'FileAccessDate', 'FileInodeChangeDate', 'FileLength',
|
||||
'FileModifyDate', 'FileName', 'FilePermissions',
|
||||
'FileSize', 'FileType', 'FileTypeExtension',
|
||||
'FrameCount', 'FrameRate', 'ImageHeight', 'ImageSize',
|
||||
'ImageWidth', 'MIMEType', 'MaxBitrate', 'MaxPacketSize',
|
||||
'Megapixels', 'MinPacketSize', 'Preroll', 'SendDuration',
|
||||
'SourceFile', 'StreamNumber', 'VideoCodecName', }
|
||||
meta_key_value_allowlist = { # some metadata are mandatory :/
|
||||
'AudioCodecDescription': '',
|
||||
'CreationDate': '0000:00:00 00:00:00Z',
|
||||
'FileID': '00000000-0000-0000-0000-000000000000',
|
||||
'Flags': 2, # FIXME: What is this? Why 2?
|
||||
'ModifyDate': '0000:00:00 00:00:00',
|
||||
'TimeOffset': '0 s',
|
||||
'VideoCodecDescription': '',
|
||||
'StreamType': 'Audio',
|
||||
}
|
||||
|
||||
|
||||
class AVIParser(AbstractFFmpegParser):
|
||||
mimetypes = {'video/x-msvideo', }
|
||||
meta_allowlist = {'SourceFile', 'ExifToolVersion', 'FileName', 'Directory',
|
||||
'FileSize', 'FileModifyDate', 'FileAccessDate',
|
||||
'FileInodeChangeDate', 'FilePermissions', 'FileType',
|
||||
'FileTypeExtension', 'MIMEType', 'FrameRate', 'MaxDataRate',
|
||||
'FrameCount', 'StreamCount', 'StreamType', 'VideoCodec',
|
||||
'VideoFrameRate', 'VideoFrameCount', 'Quality',
|
||||
'SampleSize', 'BMPVersion', 'ImageWidth', 'ImageHeight',
|
||||
'Planes', 'BitDepth', 'Compression', 'ImageLength',
|
||||
'PixelsPerMeterX', 'PixelsPerMeterY',
|
||||
'NumImportantColors', 'NumColors',
|
||||
'RedMask', 'GreenMask', 'BlueMask', 'AlphaMask',
|
||||
'ColorSpace', 'AudioCodec', 'AudioCodecRate',
|
||||
'AudioSampleCount',
|
||||
'AudioSampleRate', 'Encoding', 'NumChannels',
|
||||
'SampleRate', 'AvgBytesPerSec', 'BitsPerSample',
|
||||
'Duration', 'ImageSize', 'Megapixels'}
|
||||
|
||||
|
||||
class MP4Parser(AbstractFFmpegParser):
|
||||
mimetypes = {'video/mp4', }
|
||||
meta_allowlist = {'AudioFormat', 'AvgBitrate', 'Balance', 'TrackDuration',
|
||||
'XResolution', 'YResolution', 'ExifToolVersion',
|
||||
'FileAccessDate', 'FileInodeChangeDate', 'FileModifyDate',
|
||||
'FileName', 'FilePermissions', 'MIMEType', 'FileType',
|
||||
'FileTypeExtension', 'Directory', 'ImageWidth',
|
||||
'ImageSize', 'ImageHeight', 'FileSize', 'SourceFile',
|
||||
'BitDepth', 'Duration', 'AudioChannels',
|
||||
'AudioBitsPerSample', 'AudioSampleRate', 'Megapixels',
|
||||
'MovieDataSize', 'VideoFrameRate', 'MediaTimeScale',
|
||||
'SourceImageHeight', 'SourceImageWidth',
|
||||
'MatrixStructure', 'MediaDuration'}
|
||||
meta_key_value_allowlist = { # some metadata are mandatory :/
|
||||
'CreateDate': '0000:00:00 00:00:00',
|
||||
'CurrentTime': '0 s',
|
||||
'MediaCreateDate': '0000:00:00 00:00:00',
|
||||
'MediaLanguageCode': 'und',
|
||||
'MediaModifyDate': '0000:00:00 00:00:00',
|
||||
'ModifyDate': '0000:00:00 00:00:00',
|
||||
'OpColor': '0 0 0',
|
||||
'PosterTime': '0 s',
|
||||
'PreferredRate': '1',
|
||||
'PreferredVolume': '100.00%',
|
||||
'PreviewDuration': '0 s',
|
||||
'PreviewTime': '0 s',
|
||||
'SelectionDuration': '0 s',
|
||||
'SelectionTime': '0 s',
|
||||
'TrackCreateDate': '0000:00:00 00:00:00',
|
||||
'TrackModifyDate': '0000:00:00 00:00:00',
|
||||
'TrackVolume': '0.00%',
|
||||
}
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
def _get_ffmpeg_path() -> str: # pragma: no cover
|
||||
which_path = shutil.which('ffmpeg')
|
||||
if which_path:
|
||||
return which_path
|
||||
|
||||
raise RuntimeError("Unable to find ffmpeg")
|
192
libmat2/web.py
Normal file
@@ -0,0 +1,192 @@
|
||||
from html import parser, escape
|
||||
from typing import Any, Optional, Dict, List, Tuple, Set
|
||||
import re
|
||||
import string
|
||||
|
||||
from . import abstract
|
||||
|
||||
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
|
||||
class CSSParser(abstract.AbstractParser):
|
||||
"""There is no such things as metadata in CSS files,
|
||||
only comments of the form `/* … */`, so we're removing the laters."""
|
||||
mimetypes = {'text/css', }
|
||||
flags = re.MULTILINE | re.DOTALL
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
with open(self.filename, encoding='utf-8') as f:
|
||||
try:
|
||||
content = f.read()
|
||||
except UnicodeDecodeError: # pragma: no cover
|
||||
raise ValueError
|
||||
cleaned = re.sub(r'/\*.*?\*/', '', content, count=0, flags=self.flags)
|
||||
with open(self.output_filename, 'w', encoding='utf-8') as f:
|
||||
f.write(cleaned)
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Any]:
|
||||
metadata = {}
|
||||
with open(self.filename, encoding='utf-8') as f:
|
||||
try:
|
||||
content = f.read()
|
||||
except UnicodeDecodeError: # pragma: no cover
|
||||
raise ValueError
|
||||
cssdoc = re.findall(r'/\*(.*?)\*/', content, self.flags)
|
||||
for match in cssdoc:
|
||||
for line in match.splitlines():
|
||||
try:
|
||||
k, v = line.split(':')
|
||||
metadata[k.strip(string.whitespace + '*')] = v.strip()
|
||||
except ValueError:
|
||||
metadata['harmful data'] = line.strip()
|
||||
return metadata
|
||||
|
||||
|
||||
class AbstractHTMLParser(abstract.AbstractParser):
|
||||
tags_blocklist: Set[str] = set()
|
||||
# In some html/xml-based formats some tags are mandatory,
|
||||
# so we're keeping them, but are discarding their content
|
||||
tags_required_blocklist: Set[str] = set()
|
||||
|
||||
def __init__(self, filename):
|
||||
super().__init__(filename)
|
||||
self.__parser = _HTMLParser(self.filename, self.tags_blocklist,
|
||||
self.tags_required_blocklist)
|
||||
with open(filename, encoding='utf-8') as f:
|
||||
self.__parser.feed(f.read())
|
||||
self.__parser.close()
|
||||
|
||||
def get_meta(self) -> Dict[str, Any]:
|
||||
return self.__parser.get_meta()
|
||||
|
||||
def remove_all(self) -> bool:
|
||||
return self.__parser.remove_all(self.output_filename)
|
||||
|
||||
|
||||
class HTMLParser(AbstractHTMLParser):
|
||||
mimetypes = {'text/html', 'application/xhtml+xml'}
|
||||
tags_blocklist = {'meta', }
|
||||
tags_required_blocklist = {'title', }
|
||||
|
||||
|
||||
class DTBNCXParser(AbstractHTMLParser):
|
||||
mimetypes = {'application/x-dtbncx+xml', }
|
||||
tags_required_blocklist = {'title', 'doctitle', 'meta'}
|
||||
|
||||
|
||||
class _HTMLParser(parser.HTMLParser):
|
||||
"""Python doesn't have a validating html parser in its stdlib, so
|
||||
we're using an internal queue to track all the opening/closing tags,
|
||||
and hoping for the best.
|
||||
|
||||
Moreover, the parser.HTMLParser call doesn't provide a get_endtag_text
|
||||
method, so we have to use get_starttag_text instead, put its result in a
|
||||
LIFO, and transform it in a closing tag when needed.
|
||||
|
||||
Also, gotcha: the `tag` parameters are always in lowercase.
|
||||
"""
|
||||
def __init__(self, filename, blocklisted_tags, required_blocklisted_tags):
|
||||
super().__init__()
|
||||
self.filename = filename
|
||||
self.__textrepr = ''
|
||||
self.__meta = {}
|
||||
self.__validation_queue: List[str] = list()
|
||||
|
||||
# We're using counters instead of booleans, to handle nested tags
|
||||
self.__in_dangerous_but_required_tag = 0
|
||||
self.__in_dangerous_tag = 0
|
||||
|
||||
if required_blocklisted_tags & blocklisted_tags: # pragma: nocover
|
||||
raise ValueError("There is an overlap between %s and %s" % (
|
||||
required_blocklisted_tags, blocklisted_tags))
|
||||
self.tag_required_blocklist = required_blocklisted_tags
|
||||
self.tag_blocklist = blocklisted_tags
|
||||
|
||||
def error(self, message): # pragma: no cover
|
||||
""" Amusingly, Python's documentation doesn't mention that this
|
||||
function needs to be implemented in subclasses of the parent class
|
||||
of parser.HTMLParser. This was found by fuzzing,
|
||||
triggering the following exception:
|
||||
NotImplementedError: subclasses of ParserBase must override error()
|
||||
"""
|
||||
raise ValueError(message)
|
||||
|
||||
def handle_starttag(self, tag: str, attrs: List[Tuple[str, Optional[str]]]):
|
||||
# Ignore the type, because mypy is too stupid to infer
|
||||
# that get_starttag_text() can't return None.
|
||||
original_tag = self.get_starttag_text() # type: ignore
|
||||
self.__validation_queue.append(original_tag) # type: ignore
|
||||
|
||||
if tag in self.tag_blocklist:
|
||||
self.__in_dangerous_tag += 1
|
||||
|
||||
if self.__in_dangerous_tag == 0:
|
||||
if self.__in_dangerous_but_required_tag == 0:
|
||||
self.__textrepr += original_tag
|
||||
|
||||
if tag in self.tag_required_blocklist:
|
||||
self.__in_dangerous_but_required_tag += 1
|
||||
|
||||
def handle_endtag(self, tag: str):
|
||||
if not self.__validation_queue:
|
||||
raise ValueError("The closing tag %s doesn't have a corresponding "
|
||||
"opening one in %s." % (tag, self.filename))
|
||||
|
||||
previous_tag = self.__validation_queue.pop()
|
||||
previous_tag = previous_tag[1:-1] # remove < and >
|
||||
previous_tag = previous_tag.split(' ')[0] # remove attributes
|
||||
if tag != previous_tag.lower():
|
||||
raise ValueError("The closing tag %s doesn't match the previous "
|
||||
"tag %s in %s" %
|
||||
(tag, previous_tag, self.filename))
|
||||
|
||||
if tag in self.tag_required_blocklist:
|
||||
self.__in_dangerous_but_required_tag -= 1
|
||||
|
||||
if self.__in_dangerous_tag == 0:
|
||||
if self.__in_dangerous_but_required_tag == 0:
|
||||
# There is no `get_endtag_text()` method :/
|
||||
self.__textrepr += '</' + previous_tag + '>'
|
||||
|
||||
if tag in self.tag_blocklist:
|
||||
self.__in_dangerous_tag -= 1
|
||||
|
||||
def handle_data(self, data: str):
|
||||
if self.__in_dangerous_but_required_tag == 0:
|
||||
if self.__in_dangerous_tag == 0:
|
||||
if data.strip():
|
||||
self.__textrepr += escape(data)
|
||||
|
||||
def handle_startendtag(self, tag: str,
|
||||
attrs: List[Tuple[str, Optional[str]]]):
|
||||
if tag in self.tag_required_blocklist | self.tag_blocklist:
|
||||
meta = {k:v for k, v in attrs}
|
||||
name = meta.get('name', 'harmful metadata')
|
||||
content = meta.get('content', 'harmful data')
|
||||
self.__meta[name] = content
|
||||
|
||||
if self.__in_dangerous_tag == 0:
|
||||
if tag in self.tag_required_blocklist:
|
||||
self.__textrepr += '<' + tag + ' />'
|
||||
return
|
||||
|
||||
if self.__in_dangerous_tag == 0:
|
||||
if self.__in_dangerous_but_required_tag == 0:
|
||||
self.__textrepr += self.get_starttag_text()
|
||||
|
||||
def remove_all(self, output_filename: str) -> bool:
|
||||
if self.__validation_queue:
|
||||
raise ValueError("Some tags (%s) were left unclosed in %s" % (
|
||||
', '.join(self.__validation_queue),
|
||||
self.filename))
|
||||
with open(output_filename, 'w', encoding='utf-8') as f:
|
||||
f.write(self.__textrepr)
|
||||
return True
|
||||
|
||||
def get_meta(self) -> Dict[str, Any]:
|
||||
if self.__validation_queue:
|
||||
raise ValueError("Some tags (%s) were left unclosed in %s" % (
|
||||
', '.join(self.__validation_queue),
|
||||
self.filename))
|
||||
return self.__meta
|
213
mat2
@@ -1,128 +1,231 @@
|
||||
#!/usr/bin/python3
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
from typing import Tuple
|
||||
import shutil
|
||||
from typing import List, Set, Dict
|
||||
import sys
|
||||
import itertools
|
||||
import mimetypes
|
||||
import argparse
|
||||
import multiprocessing
|
||||
import logging
|
||||
import unicodedata
|
||||
import concurrent.futures
|
||||
|
||||
from libmat2 import parser_factory, unsupported_extensions
|
||||
try:
|
||||
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
|
||||
from libmat2 import check_dependencies, UnknownMemberPolicy
|
||||
except ValueError as ex:
|
||||
print(ex)
|
||||
sys.exit(1)
|
||||
|
||||
__version__ = '0.1.2'
|
||||
__version__ = '0.13.5'
|
||||
|
||||
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.WARNING)
|
||||
|
||||
def __print_without_chars(s: str):
|
||||
""" Remove control characters
|
||||
We might use 'Cc' instead of 'C', but better safe than sorry
|
||||
https://www.unicode.org/reports/tr44/#GC_Values_Table
|
||||
"""
|
||||
print(''.join(ch for ch in s if not unicodedata.category(ch).startswith('C')))
|
||||
|
||||
def __check_file(filename: str, mode: int = os.R_OK) -> bool:
|
||||
if not os.path.exists(filename):
|
||||
print("[-] %s is doesn't exist." % filename)
|
||||
__print_without_chars("[-] %s doesn't exist." % filename)
|
||||
return False
|
||||
elif not os.path.isfile(filename):
|
||||
print("[-] %s is not a regular file." % filename)
|
||||
__print_without_chars("[-] %s is not a regular file." % filename)
|
||||
return False
|
||||
elif not os.access(filename, mode):
|
||||
print("[-] %s is not readable and writeable." % filename)
|
||||
mode_str: List[str] = list()
|
||||
if mode & os.R_OK:
|
||||
mode_str += 'readable'
|
||||
if mode & os.W_OK:
|
||||
mode_str += 'writeable'
|
||||
__print_without_chars("[-] %s is not %s." % (filename, 'nor '.join(mode_str)))
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def create_arg_parser():
|
||||
def create_arg_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser(description='Metadata anonymisation toolkit 2')
|
||||
parser.add_argument('files', nargs='*')
|
||||
parser.add_argument('-v', '--version', action='version',
|
||||
version='MAT2 %s' % __version__)
|
||||
parser.add_argument('-l', '--list', action='store_true',
|
||||
help='list all supported fileformats')
|
||||
|
||||
info = parser.add_mutually_exclusive_group()
|
||||
info.add_argument('-c', '--check', action='store_true',
|
||||
help='check if a file is free of harmful metadatas')
|
||||
info.add_argument('-s', '--show', action='store_true',
|
||||
help='list all the harmful metadata of a file without removing them')
|
||||
info.add_argument('-L', '--lightweight', action='store_true',
|
||||
help='remove SOME metadata')
|
||||
parser.add_argument('-V', '--verbose', action='store_true',
|
||||
help='show more verbose status information')
|
||||
parser.add_argument('--unknown-members', metavar='policy', default='abort',
|
||||
help='how to handle unknown members of archive-style '
|
||||
'files (policy should be one of: %s) [Default: abort]' %
|
||||
', '.join(p.value for p in UnknownMemberPolicy))
|
||||
parser.add_argument('--inplace', action='store_true',
|
||||
help='clean in place, without backup')
|
||||
parser.add_argument('--no-sandbox', dest='sandbox', action='store_false',
|
||||
default=True, help='Disable bubblewrap\'s sandboxing')
|
||||
|
||||
excl_group = parser.add_mutually_exclusive_group()
|
||||
excl_group.add_argument('files', nargs='*', help='the files to process',
|
||||
default=[])
|
||||
excl_group.add_argument('-v', '--version', action='version',
|
||||
version='mat2 %s' % __version__)
|
||||
excl_group.add_argument('-l', '--list', action='store_true', default=False,
|
||||
help='list all supported fileformats')
|
||||
excl_group.add_argument('--check-dependencies', action='store_true',
|
||||
default=False,
|
||||
help='check if mat2 has all the dependencies it '
|
||||
'needs')
|
||||
|
||||
excl_group = parser.add_mutually_exclusive_group()
|
||||
excl_group.add_argument('-L', '--lightweight', action='store_true',
|
||||
help='remove SOME metadata')
|
||||
excl_group.add_argument('-s', '--show', action='store_true',
|
||||
help='list harmful metadata detectable by mat2 '
|
||||
'without removing them')
|
||||
|
||||
return parser
|
||||
|
||||
|
||||
def show_meta(filename: str):
|
||||
def show_meta(filename: str, sandbox: bool):
|
||||
if not __check_file(filename):
|
||||
return
|
||||
|
||||
p, mtype = parser_factory.get_parser(filename) # type: ignore
|
||||
try:
|
||||
p, mtype = parser_factory.get_parser(filename) # type: ignore
|
||||
except ValueError as e:
|
||||
__print_without_chars("[-] something went wrong when processing %s: %s" % (filename, e))
|
||||
return
|
||||
if p is None:
|
||||
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
||||
__print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
|
||||
return
|
||||
p.sandbox = sandbox
|
||||
__print_meta(filename, p.get_meta())
|
||||
|
||||
|
||||
def __print_meta(filename: str, metadata: Dict, depth: int = 1):
|
||||
padding = " " * depth*2
|
||||
if not metadata:
|
||||
__print_without_chars(padding + "No metadata found in %s." % filename)
|
||||
return
|
||||
|
||||
print("[+] Metadata for %s:" % filename)
|
||||
for k, v in p.get_meta().items():
|
||||
__print_without_chars("[%s] Metadata for %s:" % ('+'*depth, filename))
|
||||
|
||||
for (k, v) in sorted(metadata.items()):
|
||||
if isinstance(v, dict):
|
||||
__print_meta(k, v, depth+1)
|
||||
continue
|
||||
|
||||
try: # FIXME this is ugly.
|
||||
print(" %s: %s" % (k, v))
|
||||
__print_without_chars(padding + " %s: %s" % (k, v))
|
||||
except UnicodeEncodeError:
|
||||
print(" %s: harmful content" % k)
|
||||
__print_without_chars(padding + " %s: harmful content" % k)
|
||||
except TypeError:
|
||||
pass # for things that aren't iterable
|
||||
|
||||
def clean_meta(params: Tuple[str, bool]) -> bool:
|
||||
filename, is_lightweigth = params
|
||||
if not __check_file(filename, os.R_OK|os.W_OK):
|
||||
|
||||
def clean_meta(filename: str, is_lightweight: bool, inplace: bool, sandbox: bool,
|
||||
policy: UnknownMemberPolicy) -> bool:
|
||||
mode = (os.R_OK | os.W_OK) if inplace else os.R_OK
|
||||
if not __check_file(filename, mode):
|
||||
return False
|
||||
|
||||
p, mtype = parser_factory.get_parser(filename) # type: ignore
|
||||
try:
|
||||
p, mtype = parser_factory.get_parser(filename) # type: ignore
|
||||
except ValueError as e:
|
||||
__print_without_chars("[-] something went wrong when cleaning %s: %s" % (filename, e))
|
||||
return False
|
||||
if p is None:
|
||||
print("[-] %s's format (%s) is not supported" % (filename, mtype))
|
||||
__print_without_chars("[-] %s's format (%s) is not supported" % (filename, mtype))
|
||||
return False
|
||||
if is_lightweigth:
|
||||
return p.remove_all_lightweight()
|
||||
return p.remove_all()
|
||||
p.unknown_member_policy = policy
|
||||
p.lightweight_cleaning = is_lightweight
|
||||
p.sandbox = sandbox
|
||||
|
||||
try:
|
||||
logging.debug('Cleaning %s…', filename)
|
||||
ret = p.remove_all()
|
||||
if ret is True:
|
||||
shutil.copymode(filename, p.output_filename)
|
||||
if inplace is True:
|
||||
os.rename(p.output_filename, filename)
|
||||
return ret
|
||||
except RuntimeError as e:
|
||||
__print_without_chars("[-] %s can't be cleaned: %s" % (filename, e))
|
||||
return False
|
||||
|
||||
|
||||
def show_parsers():
|
||||
print('[+] Supported formats:')
|
||||
formats = list()
|
||||
for parser in parser_factory._get_parsers():
|
||||
formats = set() # Set[str]
|
||||
for parser in parser_factory._get_parsers(): # type: ignore
|
||||
for mtype in parser.mimetypes:
|
||||
extensions = set()
|
||||
extensions = set() # Set[str]
|
||||
for extension in mimetypes.guess_all_extensions(mtype):
|
||||
if extension[1:] not in unsupported_extensions: # skip the dot
|
||||
if extension not in UNSUPPORTED_EXTENSIONS:
|
||||
extensions.add(extension)
|
||||
if not extensions:
|
||||
# we're not supporting a single extension in the current
|
||||
# mimetype, so there is not point in showing the mimetype at all
|
||||
continue
|
||||
formats.append(' - %s (%s)' % (mtype, ', '.join(extensions)))
|
||||
formats.add(' - %s (%s)' % (mtype, ', '.join(extensions)))
|
||||
print('\n'.join(sorted(formats)))
|
||||
|
||||
|
||||
def __get_files_recursively(files):
|
||||
def __get_files_recursively(files: List[str]) -> List[str]:
|
||||
ret: Set[str] = set()
|
||||
for f in files:
|
||||
if os.path.isdir(f):
|
||||
for path, _, _files in os.walk(f):
|
||||
for _f in _files:
|
||||
fname = os.path.join(path, _f)
|
||||
if __check_file(fname):
|
||||
yield fname
|
||||
ret.add(fname)
|
||||
elif __check_file(f):
|
||||
yield f
|
||||
ret.add(f)
|
||||
return list(ret)
|
||||
|
||||
def main():
|
||||
|
||||
def main() -> int:
|
||||
arg_parser = create_arg_parser()
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
if args.verbose:
|
||||
logging.getLogger(__name__).setLevel(logging.DEBUG)
|
||||
|
||||
if not args.files:
|
||||
if not args.list:
|
||||
return arg_parser.print_help()
|
||||
show_parsers()
|
||||
if args.list:
|
||||
show_parsers()
|
||||
return 0
|
||||
elif args.check_dependencies:
|
||||
__print_without_chars("Dependencies for mat2 %s:" % __version__)
|
||||
for key, value in sorted(check_dependencies().items()):
|
||||
__print_without_chars('- %s: %s %s' % (key, 'yes' if value['found'] else 'no',
|
||||
'(optional)' if not value['required'] else ''))
|
||||
else:
|
||||
arg_parser.print_help()
|
||||
return 0
|
||||
|
||||
elif args.show:
|
||||
for f in __get_files_recursively(args.files):
|
||||
show_meta(f)
|
||||
show_meta(f, args.sandbox)
|
||||
return 0
|
||||
|
||||
else:
|
||||
p = multiprocessing.Pool()
|
||||
mode = (args.lightweight is True)
|
||||
l = zip(__get_files_recursively(args.files), itertools.repeat(mode))
|
||||
inplace = args.inplace
|
||||
policy = UnknownMemberPolicy(args.unknown_members)
|
||||
if policy == UnknownMemberPolicy.KEEP:
|
||||
logging.warning('Keeping unknown member files may leak metadata in the resulting file!')
|
||||
|
||||
no_failure = True
|
||||
files = __get_files_recursively(args.files)
|
||||
# We have to use Processes instead of Threads, since
|
||||
# we're using tempfile.mkdtemp, which isn't thread-safe.
|
||||
futures = list()
|
||||
with concurrent.futures.ProcessPoolExecutor() as executor:
|
||||
for f in files:
|
||||
future = executor.submit(clean_meta, f, args.lightweight,
|
||||
inplace, args.sandbox, policy)
|
||||
futures.append(future)
|
||||
for future in concurrent.futures.as_completed(futures):
|
||||
no_failure &= future.result()
|
||||
return 0 if no_failure is True else -1
|
||||
|
||||
ret = list(p.imap_unordered(clean_meta, list(l)))
|
||||
return 0 if all(ret) else -1
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
|
@@ -1,29 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import gi
|
||||
gi.require_version('Nautilus', '3.0')
|
||||
from gi.repository import Nautilus, GObject
|
||||
|
||||
class ColumnExtension(GObject.GObject, Nautilus.MenuProvider):
|
||||
def menu_activate_cb(self, menu, file):
|
||||
print "menu_activate_cb", file
|
||||
# TODO: clean metadata here
|
||||
|
||||
def get_background_items(self, window, file):
|
||||
""" https://bugzilla.gnome.org/show_bug.cgi?id=784278 """
|
||||
return None
|
||||
|
||||
def get_file_items(self, window, files):
|
||||
if len(files) != 1: # we're not supporting multiple files for now
|
||||
return
|
||||
|
||||
file = files[0]
|
||||
|
||||
item = Nautilus.MenuItem(
|
||||
name="MAT2::Remove_metadata",
|
||||
label="Remove metadata from %s" % file.get_name(),
|
||||
tip="Remove metadata from %s" % file.get_name()
|
||||
)
|
||||
item.connect('activate', self.menu_activate_cb, file)
|
||||
|
||||
return [item]
|
21
pyproject.toml
Normal file
@@ -0,0 +1,21 @@
|
||||
[project]
|
||||
name = "mat2"
|
||||
version = "0.13.5"
|
||||
description = "mat2 is a metadata removal tool, supporting a wide range of commonly used file formats, written in python3: at its core, it's a library, used by an eponymous command-line interface, as well as several file manager extensions."
|
||||
readme = "README.md"
|
||||
license = {file = "LICENSE"}
|
||||
requires-python = ">=3.9"
|
||||
dependencies = [
|
||||
'mutagen',
|
||||
'PyGObject',
|
||||
'pycairo',
|
||||
]
|
||||
[project.urls]
|
||||
Repository = "https://github.com/jvoisin/mat2"
|
||||
Issues = "https://github.com/jvoisin/mat2/issues"
|
||||
Changelog = "https://github.com/jvoisin/mat2/blob/master/CHANGELOG.md"
|
||||
|
||||
[tool.ruff]
|
||||
target-version = "py39"
|
||||
# E501 Line too long
|
||||
ignore = ["E501", "F401", "E402", "E722"]
|
13
setup.py
@@ -1,17 +1,17 @@
|
||||
import setuptools
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
with open("README.md", encoding='utf-8') as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
setuptools.setup(
|
||||
name="mat2",
|
||||
version='0.1.2',
|
||||
version='0.13.5',
|
||||
author="Julien (jvoisin) Voisin",
|
||||
author_email="julien.voisin+mat2@dustri.org",
|
||||
description="A handy tool to trash your metadata",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
url="https://0xacab.org/jvoisin/mat2",
|
||||
url="https://github.com/jvoisin/mat2",
|
||||
python_requires = '>=3.5.0',
|
||||
scripts=['mat2'],
|
||||
install_requires=[
|
||||
@@ -20,7 +20,8 @@ setuptools.setup(
|
||||
'pycairo',
|
||||
],
|
||||
packages=setuptools.find_packages(exclude=('tests', )),
|
||||
classifiers=(
|
||||
data_files = [('share/man/man1', ['doc/mat2.1'])],
|
||||
classifiers=[
|
||||
"Development Status :: 3 - Alpha",
|
||||
"Environment :: Console",
|
||||
"License :: OSI Approved :: GNU Lesser General Public License v3 or later (LGPLv3+)",
|
||||
@@ -28,8 +29,8 @@ setuptools.setup(
|
||||
"Programming Language :: Python :: 3 :: Only",
|
||||
"Topic :: Security",
|
||||
"Intended Audience :: End Users/Desktop",
|
||||
),
|
||||
],
|
||||
project_urls={
|
||||
'bugtacker': 'https://0xacab.org/jvoisin/mat2/issues',
|
||||
'bugtacker': 'https://github.com/jvoisin/mat2/issues',
|
||||
},
|
||||
)
|
||||
|
BIN
tests/data/broken_xml_content_types.docx
Normal file
BIN
tests/data/comment.docx
Normal file
BIN
tests/data/control_chars.jpg
Normal file
After Width: | Height: | Size: 1.9 KiB |
BIN
tests/data/dirty.aiff
Normal file
BIN
tests/data/dirty.avi
Normal file
14
tests/data/dirty.css
Normal file
@@ -0,0 +1,14 @@
|
||||
/**
|
||||
* This is my super css framework
|
||||
* version: 1.0
|
||||
* author : jvoisin
|
||||
*/
|
||||
|
||||
body {
|
||||
color: red;
|
||||
background-color: blue;
|
||||
}
|
||||
|
||||
.underline {
|
||||
text-decoration: underline; /* underline is cool */
|
||||
}
|
BIN
tests/data/dirty.epub
Normal file
BIN
tests/data/dirty.gif
Normal file
After Width: | Height: | Size: 1.1 KiB |
BIN
tests/data/dirty.heic
Normal file
14
tests/data/dirty.html
Normal file
@@ -0,0 +1,14 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta content="vim" name="generator"/>
|
||||
<meta content="jvoisin" name="author"/>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
<h1>Hello</h1>
|
||||
I am a web page.
|
||||
Please <b>love</b> me.
|
||||
Here, have a pretty picture: <img src='dirty.jpg' alt='a pretty picture'/>
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
BIN
tests/data/dirty.mp4
Normal file
8
tests/data/dirty.ppm
Normal file
@@ -0,0 +1,8 @@
|
||||
P3
|
||||
# A metadata
|
||||
3 2 1
|
||||
1 0 1 0 1 0 0 0 1
|
||||
# And an other one
|
||||
1 1 0 1 0 1 1 0 0
|
||||
# and a final one here
|
||||
|
636
tests/data/dirty.svg
Normal file
@@ -0,0 +1,636 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
inkscape:export-ydpi="384"
|
||||
inkscape:export-xdpi="384"
|
||||
inkscape:export-filename="mat2.png"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg11300"
|
||||
sodipodi:version="0.32"
|
||||
inkscape:version="0.92.3 (2405546, 2018-03-11)"
|
||||
sodipodi:docname="dirty.svg"
|
||||
inkscape:output_extension="org.inkscape.output.svg.inkscape"
|
||||
version="1.0"
|
||||
style="display:inline;enable-background:new"
|
||||
viewBox="0 0 128 128">
|
||||
<script
|
||||
id="script4600" />
|
||||
<title
|
||||
id="title4162">Adwaita Icon Template</title>
|
||||
<defs
|
||||
id="defs3" />
|
||||
<sodipodi:namedview
|
||||
stroke="#ef2929"
|
||||
fill="#f57900"
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="0.25490196"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="4.0446508"
|
||||
inkscape:cx="61.536232"
|
||||
inkscape:cy="41.548134"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="true"
|
||||
inkscape:grid-bbox="true"
|
||||
inkscape:document-units="px"
|
||||
inkscape:showpageshadow="false"
|
||||
inkscape:window-width="1366"
|
||||
inkscape:window-height="747"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="21"
|
||||
width="400px"
|
||||
height="300px"
|
||||
inkscape:snap-nodes="true"
|
||||
inkscape:snap-bbox="false"
|
||||
objecttolerance="7"
|
||||
gridtolerance="12"
|
||||
guidetolerance="13"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:pagecheckerboard="false"
|
||||
showguides="true"
|
||||
inkscape:guide-bbox="true"
|
||||
inkscape:locked="false"
|
||||
inkscape:measure-start="0,0"
|
||||
inkscape:measure-end="0,0"
|
||||
inkscape:object-nodes="true"
|
||||
inkscape:bbox-nodes="true"
|
||||
inkscape:snap-global="true"
|
||||
inkscape:object-paths="true"
|
||||
inkscape:snap-intersection-paths="true"
|
||||
inkscape:snap-bbox-edge-midpoints="true"
|
||||
inkscape:snap-bbox-midpoints="true"
|
||||
showborder="false"
|
||||
inkscape:snap-center="true"
|
||||
inkscape:snap-object-midpoints="true"
|
||||
inkscape:snap-midpoints="true"
|
||||
inkscape:snap-smooth-nodes="true">
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid5883"
|
||||
spacingx="2"
|
||||
spacingy="2"
|
||||
enabled="true"
|
||||
visible="true"
|
||||
empspacing="4"
|
||||
originx="0"
|
||||
originy="0" />
|
||||
<sodipodi:guide
|
||||
position="64,8"
|
||||
orientation="0,1"
|
||||
id="guide1073"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="12,64"
|
||||
orientation="1,0"
|
||||
id="guide1075"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,104"
|
||||
orientation="0,1"
|
||||
id="guide1099"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,128"
|
||||
orientation="0,1"
|
||||
id="guide993"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="104,64"
|
||||
orientation="1,0"
|
||||
id="guide995"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="9.2651362e-08,64"
|
||||
orientation="1,0"
|
||||
id="guide867"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="120,64"
|
||||
orientation="1,0"
|
||||
id="guide869"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,116"
|
||||
orientation="0,1"
|
||||
id="guide871"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid873"
|
||||
spacingx="1"
|
||||
spacingy="1"
|
||||
empspacing="8"
|
||||
color="#000000"
|
||||
opacity="0.49019608"
|
||||
empcolor="#000000"
|
||||
empopacity="0.08627451"
|
||||
dotted="true" />
|
||||
<sodipodi:guide
|
||||
position="24,64"
|
||||
orientation="1,0"
|
||||
id="guide877"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="116,64"
|
||||
orientation="1,0"
|
||||
id="guide879"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,120"
|
||||
orientation="0,1"
|
||||
id="guide881"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,12"
|
||||
orientation="0,1"
|
||||
id="guide883"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="8,64"
|
||||
orientation="1,0"
|
||||
id="guide885"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="128,64"
|
||||
orientation="1,0"
|
||||
id="guide887"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,0"
|
||||
orientation="0,1"
|
||||
id="guide897"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,24"
|
||||
orientation="0,1"
|
||||
id="guide899"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="256,256"
|
||||
orientation="-0.70710678,0.70710678"
|
||||
id="guide950"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,64"
|
||||
orientation="0.70710678,0.70710678"
|
||||
id="guide952"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata4">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:creator>
|
||||
<cc:Agent>
|
||||
<dc:title>GNOME Design Team</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:creator>
|
||||
<dc:source>mat2's source code</dc:source>
|
||||
<cc:license
|
||||
rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
|
||||
<dc:title>Adwaita Icon Template</dc:title>
|
||||
<dc:subject>
|
||||
<rdf:Bag>
|
||||
<rdf:li>mat2</rdf:li>
|
||||
<rdf:li>logo</rdf:li>
|
||||
<rdf:li>metadata</rdf:li>
|
||||
</rdf:Bag>
|
||||
</dc:subject>
|
||||
<dc:date>2019 07 13</dc:date>
|
||||
<dc:rights>
|
||||
<cc:Agent>
|
||||
<dc:title>LGPL</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:rights>
|
||||
<dc:publisher>
|
||||
<cc:Agent>
|
||||
<dc:title>jvoisin</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:publisher>
|
||||
<dc:identifier>mat2-testdata-svg</dc:identifier>
|
||||
<dc:relation />
|
||||
<dc:language>English</dc:language>
|
||||
<dc:coverage />
|
||||
<dc:description>This is a test svg image for mat2's testsuite</dc:description>
|
||||
<dc:contributor>
|
||||
<cc:Agent>
|
||||
<dc:title>jvoisin, and Rose for the design</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:contributor>
|
||||
</cc:Work>
|
||||
<cc:License
|
||||
rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
|
||||
<cc:permits
|
||||
rdf:resource="http://creativecommons.org/ns#Reproduction" />
|
||||
<cc:permits
|
||||
rdf:resource="http://creativecommons.org/ns#Distribution" />
|
||||
<cc:requires
|
||||
rdf:resource="http://creativecommons.org/ns#Notice" />
|
||||
<cc:requires
|
||||
rdf:resource="http://creativecommons.org/ns#Attribution" />
|
||||
<cc:permits
|
||||
rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
|
||||
<cc:requires
|
||||
rdf:resource="http://creativecommons.org/ns#ShareAlike" />
|
||||
</cc:License>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
id="layer1"
|
||||
inkscape:label="Icon"
|
||||
inkscape:groupmode="layer"
|
||||
style="display:inline"
|
||||
transform="translate(0,-172)">
|
||||
<g
|
||||
inkscape:groupmode="layer"
|
||||
id="layer2"
|
||||
inkscape:label="baseplate"
|
||||
style="display:none">
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Normal';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
|
||||
x="7.9499588"
|
||||
y="148.65199"
|
||||
id="context"
|
||||
inkscape:label="context"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan2716"
|
||||
x="7.9499588"
|
||||
y="148.65199"
|
||||
style="font-size:5.33333349px;stroke-width:0.33264872">apps</tspan></text>
|
||||
<text
|
||||
inkscape:label="icon-name"
|
||||
id="text3021"
|
||||
y="157.23398"
|
||||
x="7.7533054"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
|
||||
xml:space="preserve"><tspan
|
||||
y="157.23398"
|
||||
x="7.7533054"
|
||||
id="tspan3023"
|
||||
sodipodi:role="line"
|
||||
style="font-size:5.33333349px;stroke-width:0.33264872">org.gnome.</tspan></text>
|
||||
<g
|
||||
style="display:inline;fill:#000000;enable-background:new"
|
||||
transform="matrix(7.9911709,0,0,8.0036407,-167.7909,-4846.0776)"
|
||||
id="g12027"
|
||||
inkscape:export-xdpi="12"
|
||||
inkscape:export-ydpi="12" />
|
||||
<rect
|
||||
style="display:inline;overflow:visible;visibility:visible;fill:#f0f0f0;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.5;marker:none;enable-background:accumulate"
|
||||
id="rect13805"
|
||||
width="128"
|
||||
height="128"
|
||||
x="9.2651362e-08"
|
||||
y="172"
|
||||
inkscape:label="512x512" />
|
||||
<g
|
||||
id="g883"
|
||||
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
|
||||
transform="translate(-24,24)" />
|
||||
<g
|
||||
id="g900"
|
||||
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
|
||||
transform="translate(-24,24)" />
|
||||
<g
|
||||
id="g1168"
|
||||
transform="matrix(0.25,0,0,0.25,6.9488522e-8,225)">
|
||||
<circle
|
||||
cx="256"
|
||||
cy="44"
|
||||
r="240"
|
||||
id="path1142"
|
||||
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
|
||||
<rect
|
||||
ry="32"
|
||||
rx="32"
|
||||
y="-180"
|
||||
x="96"
|
||||
height="448"
|
||||
width="319.99979"
|
||||
id="rect1110"
|
||||
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
|
||||
<rect
|
||||
ry="32"
|
||||
rx="32"
|
||||
y="-164"
|
||||
x="48"
|
||||
height="416"
|
||||
width="416"
|
||||
id="rect1110-8"
|
||||
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
|
||||
<rect
|
||||
ry="32"
|
||||
rx="32"
|
||||
y="-116"
|
||||
x="32"
|
||||
height="320"
|
||||
width="448"
|
||||
id="rect1110-8-9"
|
||||
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
|
||||
</g>
|
||||
</g>
|
||||
<g
|
||||
inkscape:groupmode="layer"
|
||||
id="layer9"
|
||||
inkscape:label="hires"
|
||||
style="display:none" />
|
||||
<g
|
||||
id="g944"
|
||||
transform="matrix(1,0,0,0.93868822,0,14.545966)">
|
||||
<path
|
||||
style="fill:#99c1f1;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.41013032;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 50.53899,195.25817 6.396029,-11.43484 1.082405,-0.87215 4.821622,-10.46578 0.885604,-0.38763 2.558412,4.74837 2.755213,9.59364 1.672808,1.35667 3.542417,-0.87215 5.707227,12.59771 12.988859,9.59364 3.050415,3.87621 v 2.71335 l -16.334476,-1.25977 -7.084833,1.45359 -4.428021,-0.38763 -7.084833,0.29072 -11.414452,-0.58143 -3.640817,0.96905 -9.052843,-1.64739 -2.066409,0.0969 -1.476008,-0.48452 1.377607,-1.45358 1.869609,-1.06596 6.002428,-11.04722 1.279206,0.48453 5.412025,-6.49267 z"
|
||||
id="path3455"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 49.358184,215.31759 -3.444016,0.9206 -9.003641,-1.74429 -1.918809,0.24226 -1.623608,-0.58143 1.574407,-1.50204 1.722008,-0.96905 5.953228,-11.09567 1.279205,0.53298 5.510426,-6.54112 0.344401,0.29072 -4.969223,10.27197 2.214011,1.93811 -0.246001,4.45765 z"
|
||||
id="path3459"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 50.438601,195.22912 6.470906,-11.5803 1.113274,-0.6167 4.870575,-10.62099 0.904535,-0.41113 -0.417479,3.3576 0.626218,0.89079 0.834954,15.89722 1.391594,3.70021 -3.687722,5.34476 0.208739,1.37044 -0.347898,5.68737 1.87865,3.28908 7.375442,2.19272 1.252433,2.19272 -0.487057,0.13704 -4.244358,-0.54818 -6.540486,0.41114 -2.435287,-2.19272 -0.626216,-4.24839 -2.087389,-6.16703 -4.035619,-3.42612 -2.087388,-4.38544"
|
||||
id="path3461"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 32.069579,218.11563 c -0.06958,-0.27409 0.695796,-1.23341 0.695796,-1.23341 l 2.783185,-0.0685 1.739491,2.26124 4.661836,5.13919 0.139158,1.57602 -4.174778,5.96145 -0.487057,6.16703 -2.922344,2.26124 -0.06958,1.57601 h -1.113274 l -1.322013,-3.08351 2.017809,-14.86938 z"
|
||||
id="path3400"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 48.83827,222.43255 1.600331,-3.01499 -0.695796,-0.75375 -5.635951,-1.16488 -3.200663,0.82227 -0.06958,1.50749 1.53075,0.75375 1.461174,2.67237 -0.208739,1.71307 1.739489,1.02783 2.296129,-0.54818 z"
|
||||
id="path3402"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 51.203977,217.70449 1.113274,-0.68522 2.365707,1.02784 1.322013,2.67237 -2.226548,2.26125 -1.322013,-0.82227 -1.322013,-0.61671 0.834956,-1.71306 z"
|
||||
id="path3404"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 43.758957,226.61242 1.948228,0.68522 0.417479,1.91863 -0.626216,1.30193 -1.182854,0.34261 -1.113275,1.02784 -0.765376,3.63169 0.626218,3.01499 -1.252435,0.68522 -0.487057,-0.41113 -0.278319,-1.5075 -1.80907,-1.37045 -0.765376,-3.49464 3.618141,-3.42613 1.669912,-2.67237"
|
||||
id="path3406"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 50.57776,223.25481 0.13916,0.68523 -2.783187,3.83726 0.06958,1.64454 -0.626218,1.50749 -1.60033,1.43897 -0.06958,0.75375 1.600333,1.91863 1.182854,3.08351 0.974114,0.68523 1.669911,-2.80942 -0.278318,-3.22056 3.966039,-3.3576 0.695796,-1.09636 -3.270243,-4.45396 z"
|
||||
id="path3408"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 51.839954,236.39856 -0.834826,1.58948 0.166966,1.26061 1.057445,1.97315 0.500896,-0.32886 0.389584,-1.7539 1.447031,-1.151 2.337512,-4.0559 -0.22262,-1.04138 -1.947927,-1.69909 -2.114892,1.31542 0.278276,3.39819 z"
|
||||
id="path3410"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 57.593778,229.84236 -1.043694,1.09636 0.765375,0.89079 1.043695,-0.20556 v -1.43898 z"
|
||||
id="path3412"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 59.344793,218.25267 -0.765376,2.19272 -0.695796,0.27409 -0.695796,1.91863 -2.226548,2.26124 2.574446,3.56317 h 1.182854 l 0.487057,0.75375 0.626217,1.09636 1.948229,1.30193 2.922346,-0.6167 1.53075,-2.26125 -1.043694,-3.3576 -1.043693,-1.64454 1.322011,-2.60385 -0.904535,-1.37045 -2.226548,0.0685 z"
|
||||
id="path3416"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.150522,238.17554 -0.518261,1.78635 1.036524,2.16915 1.684349,-2.04155 -0.647826,-2.16915 z"
|
||||
id="path3418"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 66.789813,223.66595 1.600333,-0.75375 1.739489,-4.11135 2.922346,0.75375 1.322013,0.41114 0.139159,6.7152 -1.461172,1.02784 -2.226548,4.17987 -0.834956,-0.41114 -0.626216,0.95932 -2.574448,-0.61671 0.904537,-3.08351 z"
|
||||
id="path3422"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 77.505077,218.59529 1.182854,-0.20557 2.435287,1.30193 -0.974115,1.02783 -2.087389,3.63169 -1.391593,0.0685 -1.113274,-0.61671 1.043695,-2.19271 z"
|
||||
id="path3426"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 73.539038,231.06638 1.043695,-1.30193 1.043694,-2.80942 4.522676,1.71306 -0.974115,2.87795 -1.94823,-0.41114 -1.80907,1.09636 z"
|
||||
id="path3428"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 78.200873,225.6531 7.932079,-7.94861 3.339822,1.09636 0.974115,0.13705 1.600331,-1.02784 3.339822,0.0685 -5.079314,12.81371 -3.200663,-1.98715 0.139161,-1.16489 -0.695798,-0.6167 -0.208737,-1.16488 -1.043696,0.27409 -3.200663,2.39829 z"
|
||||
id="path3430"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 81.401536,230.99786 c 0,-0.2741 2.156968,-1.98716 2.156968,-1.98716 l 2.017811,1.30193 -0.904535,2.32976 -1.182855,0.75375 z"
|
||||
id="path3432"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 81.679855,237.8501 0.765375,-1.91863 0.208739,-1.2334 2.156969,0.20557 2.156968,-2.87795 3.409403,1.02784 -0.904535,2.80942 -0.904535,0.34261 -0.626218,2.80943 1.043694,4.72805 -0.904535,1.09636 -1.80907,-2.19272 -0.626217,-1.37045 z"
|
||||
id="path3434"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 78.131294,238.60385 0.626216,3.08351 -0.626216,3.22056 0.765375,0.95931 -0.626216,5.68737 2.504866,2.32976 1.87865,-0.47965 0.417478,-3.35761 1.669911,-0.0685 3.757301,-1.8501 -0.20874,-1.98716 -2.226548,-0.20556 -1.182854,-3.01499 -3.200662,-2.05568 -1.252434,-2.39828 z"
|
||||
id="path3436"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 84.532619,251.41755 -0.278318,1.43898 -0.695797,0.6167 1.322013,2.67238 2.365709,-0.20557 1.53075,-2.94647 -2.365707,-1.98715 z"
|
||||
id="path3438"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 64.993183,249.51154 -1.14959,2.51583 0.766392,1.69818 2.618509,0.25159 0.702526,1.19502 1.021857,2.39003 -0.574794,2.32714 3.89583,1.88688 0.95799,-1.06923 0.510928,-4.59139 -4.023561,-2.70451 -0.127732,-4.21402 z"
|
||||
id="path3440"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.912822,251.00642 h 1.391592 l 2.574446,0.75375 1.391593,1.98715 1.461172,1.30193 -0.139159,3.42612 -3.409402,1.57602 -0.974115,-1.85011 0.626217,-3.3576 -3.270243,-1.85011 z"
|
||||
id="path3442"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.147446,264.77944 1.80907,-1.98715 3.339822,-1.85011 1.322013,-0.0685 4.661835,-3.63169 1.391594,0.34261 0.556637,4.52248 -3.200664,4.04283 -2.852765,-0.82227 -1.80907,0.54818 -0.765376,1.43897 -2.087389,0.68522 z"
|
||||
id="path3444"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 75.06979,272.93361 0.765376,-1.30192 1.252433,-0.41114 0.904535,-2.87794 1.94823,-0.61671 0.556637,2.60386 -3.339822,6.0985 -1.391593,-0.0685 z"
|
||||
id="path3446"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 71.451649,268.20556 -1.252433,1.85011 2.504867,1.98715 0.765376,0.82227 1.73949,-2.39829 -2.296127,-2.80942 -1.461173,0.27409 z"
|
||||
id="path3448"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 62.24531,254.0948 1.461172,1.02784 1.948229,0.54818 0.487058,1.64454 -1.461173,2.67237 -0.06958,1.78159 -1.669911,1.85011 -1.252433,-2.05568 0.487057,-2.80942 -1.391593,-0.34261 -0.904535,-2.80942 z"
|
||||
id="path3450"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 47.585836,246.55246 -0.695796,3.70021 -0.139159,1.37045 1.87865,0.68523 1.391592,0.95931 1.809071,-1.64454 -0.417478,-0.95931 z"
|
||||
id="path3452"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 54.682958,247.78586 -1.043694,1.02784 0.208739,1.98715 1.600331,0.89079 0.626217,-0.47965 0.06958,-2.26125 z"
|
||||
id="path3454"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 48.629531,258.95503 4.800994,-6.16703 3.409402,0.82227 0.556637,1.78159 3.131083,4.79657 -1.669911,5.82441 -3.200663,-1.37045 -0.417478,-3.49464 -2.087388,1.30192 z"
|
||||
id="path3456"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 45.915924,252.71948 -0.487056,1.98715 1.60033,1.57602 1.461174,-0.20557 -0.347899,-2.19272 z"
|
||||
id="path3458"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 67.555189,261.6274 -1.80907,2.80943 -2.435287,8.42826 2.783185,3.76874 1.461172,-0.0685 1.113274,-2.12419 1.043696,-0.20557 0.487057,-1.09636 -1.043694,-4.45396 1.182853,-4.31692 z"
|
||||
id="path3460"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 58.718577,267.79443 1.600331,-1.23341 2.017809,1.71306 -0.904535,1.85011 z"
|
||||
id="path3462"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 58.509838,276.49678 2.156968,-4.591 1.391593,-0.27409 0.834955,1.50749 -2.017809,5.13919 z"
|
||||
id="path3464"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 71.242911,274.02997 1.391592,0.20557 1.043694,3.01499 2.01781,0.68522 1.530751,1.57602 -0.904535,2.87795 -2.365707,2.32976 -0.139159,3.56317 -1.322013,1.98715 -2.504867,-1.85011 -0.278318,-2.67237 -1.530752,-1.78159 -1.113274,-3.08351 3.61814,-4.17987 z"
|
||||
id="path3466"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 62.893354,276.5653 3.270244,1.16489 0.06958,3.70021 -0.556637,0.68523 0.974115,3.70021 1.252433,1.64454 0.06958,3.08351 -2.017809,1.37045 -2.574447,8.08566 -2.574447,-1.30193 -1.948229,-9.79872 z"
|
||||
id="path3468"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 58.440258,283.5546 h 0.556637 l 0.417478,0.95931 -0.208739,1.30193 -1.461172,0.13704 z"
|
||||
id="path3472"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 56.700767,279.16916 -1.113274,0.95931 0.834956,2.80943 1.600331,0.20556 0.487058,-2.05567 -0.695796,-1.91863 z"
|
||||
id="path3474"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 53.152207,272.17987 0.139159,5.13918 1.87865,1.23341 0.834955,-0.54818 0.904535,-3.63169 1.530752,-1.57602 -1.669911,-3.97431 -3.548561,3.08352 z"
|
||||
id="path3476"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 45.915924,258.33832 -0.208739,3.83726 -4.731414,3.97431 1.948229,2.80942 8.488716,0.82227 0.417478,1.98715 1.043694,-0.75375 0.487057,-2.19272 1.182854,-1.64454 -0.417478,-1.09635 -1.87865,-2.60386 -3.757299,-1.37045 -1.461174,-3.22056 z"
|
||||
id="path3480"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 40.279975,263.68308 1.669912,0.6167 3.061502,-6.37259 -0.904535,-5.61884 -2.504867,-0.34262 -1.391592,-1.2334 2.156968,-7.606 -2.087388,-4.45396 -3.409402,1.57602 -0.834956,3.42612 -1.87865,0.20557 -0.347898,2.1242 1.530752,1.64454 h 1.322013 l 0.626217,3.90578 2.296127,5.61884 -0.347898,2.19272 z"
|
||||
id="path3482"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 66.531337,247.61066 -0.590018,-0.31657 -0.420783,-1.71262 0.427793,-0.66945 1.306823,-1.13114 2.316342,-1.38746 1.06612,0.23465 -0.01701,2.21105 -2.36166,3.35302 z"
|
||||
id="path4284"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:transform-center-x="4.9927099"
|
||||
inkscape:transform-center-y="-9.3161687" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.373733,232.22199 -0.815102,1.03206 4.017286,4.12827 1.571981,0.17201 1.339096,-0.86006 0.931544,0.63071 2.387083,-2.98152 -2.794634,-0.91739 -3.027519,0.22934 z"
|
||||
id="path3601"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 57.407878,237.1102 -1.301737,2.34289 -1.301738,0.61888 -0.17955,1.45878 -4.488748,1.54719 -0.403989,1.50299 0.314213,0.30944 1.032412,0.0884 v 1.41457 l 1.660839,1.50299 2.154598,-1.94504 1.571064,0.35364 2.738136,-1.94504 -1.436399,-2.56392 0.987525,-3.44803 -0.583538,-1.37037 z"
|
||||
id="path3603"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 62.104217,246.96003 5.843936,-6.55723 0.659867,-2.66044 2.221783,-0.40757 -0.386451,-3.39556 -2.000988,-0.60704 -6.246127,-0.36572 -2.624948,2.5137 1.519708,2.75102 -0.347742,5.51876 z"
|
||||
id="path3605"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 71.024647,249.63275 5.822153,1.31875 1.047988,-3.89891 -1.280874,-1.43343 0.523995,-6.02038 -3.551515,5.275 0.34933,2.06413 -2.037753,0.80272 -1.164431,0.45869 z"
|
||||
id="path3607"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 59.099222,247.24427 -2.095974,1.72011 -0.05822,1.60543 0.465772,1.72011 1.455539,0.97473 -0.407551,0.97473 2.328861,-0.34402 2.27064,-2.86685 -1.571981,-0.57337 -0.640437,-2.86685 -1.51376,-0.40136 z"
|
||||
id="path3609"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 44.078067,234.34346 0.291107,4.47228 -1.863089,1.43342 2.095976,3.72691 2.037753,0.0573 2.27064,-3.55489 -2.969297,-4.98831 z"
|
||||
id="path3611"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 44.660282,245.46683 -3.318627,4.30027 1.339096,1.26141 2.561747,-0.28668 1.222652,-3.15354 z"
|
||||
id="path3613"
|
||||
inkscape:connector-curvature="0" />
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 34 KiB |
1
tests/data/dirty.txt
Normal file
@@ -0,0 +1 @@
|
||||
I'm a file that can't have metadata, but I'm supposed to be supported anyway.
|
BIN
tests/data/dirty.wav
Normal file
BIN
tests/data/dirty.webp
Normal file
After Width: | Height: | Size: 38 KiB |
BIN
tests/data/dirty.wmv
Normal file
BIN
tests/data/dirty_with_nsid.docx
Normal file
BIN
tests/data/embedded_corrupted.docx
Normal file
BIN
tests/data/embedded_corrupted.odt
Normal file
BIN
tests/data/malformed_content_types.docx
Normal file
BIN
tests/data/narrated_powerpoint_presentation.pptx
Normal file
BIN
tests/data/no_content_types.docx
Normal file
BIN
tests/data/office_revision_session_ids.docx
Normal file
BIN
tests/data/revision.docx
Normal file
BIN
tests/data/revision.odt
Normal file
636
tests/data/weird.svg
Normal file
@@ -0,0 +1,636 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns="http://www.w3.org/1337/svg"
|
||||
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
|
||||
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
|
||||
inkscape:export-ydpi="384"
|
||||
inkscape:export-xdpi="384"
|
||||
inkscape:export-filename="mat2.png"
|
||||
width="128"
|
||||
height="128"
|
||||
id="svg11300"
|
||||
sodipodi:version="0.32"
|
||||
inkscape:version="0.92.3 (2405546, 2018-03-11)"
|
||||
sodipodi:docname="dirty.svg"
|
||||
inkscape:output_extension="org.inkscape.output.svg.inkscape"
|
||||
version="1.0"
|
||||
style="display:inline;enable-background:new"
|
||||
viewBox="0 0 128 128">
|
||||
<script
|
||||
id="script4600" />
|
||||
<title
|
||||
id="title4162">Adwaita Icon Template</title>
|
||||
<defs
|
||||
id="defs3" />
|
||||
<sodipodi:namedview
|
||||
stroke="#ef2929"
|
||||
fill="#f57900"
|
||||
id="base"
|
||||
pagecolor="#ffffff"
|
||||
bordercolor="#666666"
|
||||
borderopacity="0.25490196"
|
||||
inkscape:pageopacity="0.0"
|
||||
inkscape:pageshadow="2"
|
||||
inkscape:zoom="4.0446508"
|
||||
inkscape:cx="61.536232"
|
||||
inkscape:cy="41.548134"
|
||||
inkscape:current-layer="layer1"
|
||||
showgrid="true"
|
||||
inkscape:grid-bbox="true"
|
||||
inkscape:document-units="px"
|
||||
inkscape:showpageshadow="false"
|
||||
inkscape:window-width="1366"
|
||||
inkscape:window-height="747"
|
||||
inkscape:window-x="0"
|
||||
inkscape:window-y="21"
|
||||
width="400px"
|
||||
height="300px"
|
||||
inkscape:snap-nodes="true"
|
||||
inkscape:snap-bbox="false"
|
||||
objecttolerance="7"
|
||||
gridtolerance="12"
|
||||
guidetolerance="13"
|
||||
inkscape:window-maximized="1"
|
||||
inkscape:pagecheckerboard="false"
|
||||
showguides="true"
|
||||
inkscape:guide-bbox="true"
|
||||
inkscape:locked="false"
|
||||
inkscape:measure-start="0,0"
|
||||
inkscape:measure-end="0,0"
|
||||
inkscape:object-nodes="true"
|
||||
inkscape:bbox-nodes="true"
|
||||
inkscape:snap-global="true"
|
||||
inkscape:object-paths="true"
|
||||
inkscape:snap-intersection-paths="true"
|
||||
inkscape:snap-bbox-edge-midpoints="true"
|
||||
inkscape:snap-bbox-midpoints="true"
|
||||
showborder="false"
|
||||
inkscape:snap-center="true"
|
||||
inkscape:snap-object-midpoints="true"
|
||||
inkscape:snap-midpoints="true"
|
||||
inkscape:snap-smooth-nodes="true">
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid5883"
|
||||
spacingx="2"
|
||||
spacingy="2"
|
||||
enabled="true"
|
||||
visible="true"
|
||||
empspacing="4"
|
||||
originx="0"
|
||||
originy="0" />
|
||||
<sodipodi:guide
|
||||
position="64,8"
|
||||
orientation="0,1"
|
||||
id="guide1073"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="12,64"
|
||||
orientation="1,0"
|
||||
id="guide1075"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,104"
|
||||
orientation="0,1"
|
||||
id="guide1099"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,128"
|
||||
orientation="0,1"
|
||||
id="guide993"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="104,64"
|
||||
orientation="1,0"
|
||||
id="guide995"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="9.2651362e-08,64"
|
||||
orientation="1,0"
|
||||
id="guide867"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="120,64"
|
||||
orientation="1,0"
|
||||
id="guide869"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,116"
|
||||
orientation="0,1"
|
||||
id="guide871"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<inkscape:grid
|
||||
type="xygrid"
|
||||
id="grid873"
|
||||
spacingx="1"
|
||||
spacingy="1"
|
||||
empspacing="8"
|
||||
color="#000000"
|
||||
opacity="0.49019608"
|
||||
empcolor="#000000"
|
||||
empopacity="0.08627451"
|
||||
dotted="true" />
|
||||
<sodipodi:guide
|
||||
position="24,64"
|
||||
orientation="1,0"
|
||||
id="guide877"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="116,64"
|
||||
orientation="1,0"
|
||||
id="guide879"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,120"
|
||||
orientation="0,1"
|
||||
id="guide881"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,12"
|
||||
orientation="0,1"
|
||||
id="guide883"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="8,64"
|
||||
orientation="1,0"
|
||||
id="guide885"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="128,64"
|
||||
orientation="1,0"
|
||||
id="guide887"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,0"
|
||||
orientation="0,1"
|
||||
id="guide897"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,24"
|
||||
orientation="0,1"
|
||||
id="guide899"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="256,256"
|
||||
orientation="-0.70710678,0.70710678"
|
||||
id="guide950"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
<sodipodi:guide
|
||||
position="64,64"
|
||||
orientation="0.70710678,0.70710678"
|
||||
id="guide952"
|
||||
inkscape:locked="false"
|
||||
inkscape:label=""
|
||||
inkscape:color="rgb(0,0,255)" />
|
||||
</sodipodi:namedview>
|
||||
<metadata
|
||||
id="metadata4">
|
||||
<rdf:RDF>
|
||||
<cc:Work
|
||||
rdf:about="">
|
||||
<dc:format>image/svg+xml</dc:format>
|
||||
<dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" />
|
||||
<dc:creator>
|
||||
<cc:Agent>
|
||||
<dc:title>GNOME Design Team</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:creator>
|
||||
<dc:source>mat2's source code</dc:source>
|
||||
<cc:license
|
||||
rdf:resource="http://creativecommons.org/licenses/by-sa/4.0/" />
|
||||
<dc:title>Adwaita Icon Template</dc:title>
|
||||
<dc:subject>
|
||||
<rdf:Bag>
|
||||
<rdf:li>mat2</rdf:li>
|
||||
<rdf:li>logo</rdf:li>
|
||||
<rdf:li>metadata</rdf:li>
|
||||
</rdf:Bag>
|
||||
</dc:subject>
|
||||
<dc:date>2019 07 13</dc:date>
|
||||
<dc:rights>
|
||||
<cc:Agent>
|
||||
<dc:title>LGPL</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:rights>
|
||||
<dc:publisher>
|
||||
<cc:Agent>
|
||||
<dc:title>jvoisin</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:publisher>
|
||||
<dc:identifier>mat2-testdata-svg</dc:identifier>
|
||||
<dc:relation />
|
||||
<dc:language>English</dc:language>
|
||||
<dc:coverage />
|
||||
<dc:description>This is a test svg image for mat2's testsuite</dc:description>
|
||||
<dc:contributor>
|
||||
<cc:Agent>
|
||||
<dc:title>jvoisin, and Rose for the design</dc:title>
|
||||
</cc:Agent>
|
||||
</dc:contributor>
|
||||
</cc:Work>
|
||||
<cc:License
|
||||
rdf:about="http://creativecommons.org/licenses/by-sa/4.0/">
|
||||
<cc:permits
|
||||
rdf:resource="http://creativecommons.org/ns#Reproduction" />
|
||||
<cc:permits
|
||||
rdf:resource="http://creativecommons.org/ns#Distribution" />
|
||||
<cc:requires
|
||||
rdf:resource="http://creativecommons.org/ns#Notice" />
|
||||
<cc:requires
|
||||
rdf:resource="http://creativecommons.org/ns#Attribution" />
|
||||
<cc:permits
|
||||
rdf:resource="http://creativecommons.org/ns#DerivativeWorks" />
|
||||
<cc:requires
|
||||
rdf:resource="http://creativecommons.org/ns#ShareAlike" />
|
||||
</cc:License>
|
||||
</rdf:RDF>
|
||||
</metadata>
|
||||
<g
|
||||
id="layer1"
|
||||
inkscape:label="Icon"
|
||||
inkscape:groupmode="layer"
|
||||
style="display:inline"
|
||||
transform="translate(0,-172)">
|
||||
<g
|
||||
inkscape:groupmode="layer"
|
||||
id="layer2"
|
||||
inkscape:label="baseplate"
|
||||
style="display:none">
|
||||
<text
|
||||
xml:space="preserve"
|
||||
style="font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Normal';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
|
||||
x="7.9499588"
|
||||
y="148.65199"
|
||||
id="context"
|
||||
inkscape:label="context"><tspan
|
||||
sodipodi:role="line"
|
||||
id="tspan2716"
|
||||
x="7.9499588"
|
||||
y="148.65199"
|
||||
style="font-size:5.33333349px;stroke-width:0.33264872">apps</tspan></text>
|
||||
<text
|
||||
inkscape:label="icon-name"
|
||||
id="text3021"
|
||||
y="157.23398"
|
||||
x="7.7533054"
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:5.33333349px;line-height:125%;font-family:Cantarell;-inkscape-font-specification:'Cantarell, Bold';text-align:start;writing-mode:lr-tb;text-anchor:start;display:inline;fill:#000000;fill-opacity:1;stroke:none;stroke-width:0.33264872;enable-background:new"
|
||||
xml:space="preserve"><tspan
|
||||
y="157.23398"
|
||||
x="7.7533054"
|
||||
id="tspan3023"
|
||||
sodipodi:role="line"
|
||||
style="font-size:5.33333349px;stroke-width:0.33264872">org.gnome.</tspan></text>
|
||||
<g
|
||||
style="display:inline;fill:#000000;enable-background:new"
|
||||
transform="matrix(7.9911709,0,0,8.0036407,-167.7909,-4846.0776)"
|
||||
id="g12027"
|
||||
inkscape:export-xdpi="12"
|
||||
inkscape:export-ydpi="12" />
|
||||
<rect
|
||||
style="display:inline;overflow:visible;visibility:visible;fill:#f0f0f0;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:0.5;marker:none;enable-background:accumulate"
|
||||
id="rect13805"
|
||||
width="128"
|
||||
height="128"
|
||||
x="9.2651362e-08"
|
||||
y="172"
|
||||
inkscape:label="512x512" />
|
||||
<g
|
||||
id="g883"
|
||||
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
|
||||
transform="translate(-24,24)" />
|
||||
<g
|
||||
id="g900"
|
||||
style="fill:none;fill-opacity:0.25098039;stroke:#a579b3;stroke-opacity:1"
|
||||
transform="translate(-24,24)" />
|
||||
<g
|
||||
id="g1168"
|
||||
transform="matrix(0.25,0,0,0.25,6.9488522e-8,225)">
|
||||
<circle
|
||||
cx="256"
|
||||
cy="44"
|
||||
r="240"
|
||||
id="path1142"
|
||||
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
|
||||
<rect
|
||||
ry="32"
|
||||
rx="32"
|
||||
y="-180"
|
||||
x="96"
|
||||
height="448"
|
||||
width="319.99979"
|
||||
id="rect1110"
|
||||
style="opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal" />
|
||||
<rect
|
||||
ry="32"
|
||||
rx="32"
|
||||
y="-164"
|
||||
x="48"
|
||||
height="416"
|
||||
width="416"
|
||||
id="rect1110-8"
|
||||
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
|
||||
<rect
|
||||
ry="32"
|
||||
rx="32"
|
||||
y="-116"
|
||||
x="32"
|
||||
height="320"
|
||||
width="448"
|
||||
id="rect1110-8-9"
|
||||
style="display:inline;opacity:0.1;fill:#2864b0;fill-opacity:1;stroke:none;stroke-width:1;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-dashoffset:0;stroke-opacity:1;marker:none;marker-start:none;marker-mid:none;marker-end:none;paint-order:normal;enable-background:new" />
|
||||
</g>
|
||||
</g>
|
||||
<g
|
||||
inkscape:groupmode="layer"
|
||||
id="layer9"
|
||||
inkscape:label="hires"
|
||||
style="display:none" />
|
||||
<g
|
||||
id="g944"
|
||||
transform="matrix(1,0,0,0.93868822,0,14.545966)">
|
||||
<path
|
||||
style="fill:#99c1f1;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.41013032;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;stroke-opacity:1"
|
||||
d="m 50.53899,195.25817 6.396029,-11.43484 1.082405,-0.87215 4.821622,-10.46578 0.885604,-0.38763 2.558412,4.74837 2.755213,9.59364 1.672808,1.35667 3.542417,-0.87215 5.707227,12.59771 12.988859,9.59364 3.050415,3.87621 v 2.71335 l -16.334476,-1.25977 -7.084833,1.45359 -4.428021,-0.38763 -7.084833,0.29072 -11.414452,-0.58143 -3.640817,0.96905 -9.052843,-1.64739 -2.066409,0.0969 -1.476008,-0.48452 1.377607,-1.45358 1.869609,-1.06596 6.002428,-11.04722 1.279206,0.48453 5.412025,-6.49267 z"
|
||||
id="path3455"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 49.358184,215.31759 -3.444016,0.9206 -9.003641,-1.74429 -1.918809,0.24226 -1.623608,-0.58143 1.574407,-1.50204 1.722008,-0.96905 5.953228,-11.09567 1.279205,0.53298 5.510426,-6.54112 0.344401,0.29072 -4.969223,10.27197 2.214011,1.93811 -0.246001,4.45765 z"
|
||||
id="path3459"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#241f31;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 50.438601,195.22912 6.470906,-11.5803 1.113274,-0.6167 4.870575,-10.62099 0.904535,-0.41113 -0.417479,3.3576 0.626218,0.89079 0.834954,15.89722 1.391594,3.70021 -3.687722,5.34476 0.208739,1.37044 -0.347898,5.68737 1.87865,3.28908 7.375442,2.19272 1.252433,2.19272 -0.487057,0.13704 -4.244358,-0.54818 -6.540486,0.41114 -2.435287,-2.19272 -0.626216,-4.24839 -2.087389,-6.16703 -4.035619,-3.42612 -2.087388,-4.38544"
|
||||
id="path3461"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 32.069579,218.11563 c -0.06958,-0.27409 0.695796,-1.23341 0.695796,-1.23341 l 2.783185,-0.0685 1.739491,2.26124 4.661836,5.13919 0.139158,1.57602 -4.174778,5.96145 -0.487057,6.16703 -2.922344,2.26124 -0.06958,1.57601 h -1.113274 l -1.322013,-3.08351 2.017809,-14.86938 z"
|
||||
id="path3400"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 48.83827,222.43255 1.600331,-3.01499 -0.695796,-0.75375 -5.635951,-1.16488 -3.200663,0.82227 -0.06958,1.50749 1.53075,0.75375 1.461174,2.67237 -0.208739,1.71307 1.739489,1.02783 2.296129,-0.54818 z"
|
||||
id="path3402"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 51.203977,217.70449 1.113274,-0.68522 2.365707,1.02784 1.322013,2.67237 -2.226548,2.26125 -1.322013,-0.82227 -1.322013,-0.61671 0.834956,-1.71306 z"
|
||||
id="path3404"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 43.758957,226.61242 1.948228,0.68522 0.417479,1.91863 -0.626216,1.30193 -1.182854,0.34261 -1.113275,1.02784 -0.765376,3.63169 0.626218,3.01499 -1.252435,0.68522 -0.487057,-0.41113 -0.278319,-1.5075 -1.80907,-1.37045 -0.765376,-3.49464 3.618141,-3.42613 1.669912,-2.67237"
|
||||
id="path3406"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 50.57776,223.25481 0.13916,0.68523 -2.783187,3.83726 0.06958,1.64454 -0.626218,1.50749 -1.60033,1.43897 -0.06958,0.75375 1.600333,1.91863 1.182854,3.08351 0.974114,0.68523 1.669911,-2.80942 -0.278318,-3.22056 3.966039,-3.3576 0.695796,-1.09636 -3.270243,-4.45396 z"
|
||||
id="path3408"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 51.839954,236.39856 -0.834826,1.58948 0.166966,1.26061 1.057445,1.97315 0.500896,-0.32886 0.389584,-1.7539 1.447031,-1.151 2.337512,-4.0559 -0.22262,-1.04138 -1.947927,-1.69909 -2.114892,1.31542 0.278276,3.39819 z"
|
||||
id="path3410"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 57.593778,229.84236 -1.043694,1.09636 0.765375,0.89079 1.043695,-0.20556 v -1.43898 z"
|
||||
id="path3412"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 59.344793,218.25267 -0.765376,2.19272 -0.695796,0.27409 -0.695796,1.91863 -2.226548,2.26124 2.574446,3.56317 h 1.182854 l 0.487057,0.75375 0.626217,1.09636 1.948229,1.30193 2.922346,-0.6167 1.53075,-2.26125 -1.043694,-3.3576 -1.043693,-1.64454 1.322011,-2.60385 -0.904535,-1.37045 -2.226548,0.0685 z"
|
||||
id="path3416"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.150522,238.17554 -0.518261,1.78635 1.036524,2.16915 1.684349,-2.04155 -0.647826,-2.16915 z"
|
||||
id="path3418"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 66.789813,223.66595 1.600333,-0.75375 1.739489,-4.11135 2.922346,0.75375 1.322013,0.41114 0.139159,6.7152 -1.461172,1.02784 -2.226548,4.17987 -0.834956,-0.41114 -0.626216,0.95932 -2.574448,-0.61671 0.904537,-3.08351 z"
|
||||
id="path3422"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 77.505077,218.59529 1.182854,-0.20557 2.435287,1.30193 -0.974115,1.02783 -2.087389,3.63169 -1.391593,0.0685 -1.113274,-0.61671 1.043695,-2.19271 z"
|
||||
id="path3426"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 73.539038,231.06638 1.043695,-1.30193 1.043694,-2.80942 4.522676,1.71306 -0.974115,2.87795 -1.94823,-0.41114 -1.80907,1.09636 z"
|
||||
id="path3428"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 78.200873,225.6531 7.932079,-7.94861 3.339822,1.09636 0.974115,0.13705 1.600331,-1.02784 3.339822,0.0685 -5.079314,12.81371 -3.200663,-1.98715 0.139161,-1.16489 -0.695798,-0.6167 -0.208737,-1.16488 -1.043696,0.27409 -3.200663,2.39829 z"
|
||||
id="path3430"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 81.401536,230.99786 c 0,-0.2741 2.156968,-1.98716 2.156968,-1.98716 l 2.017811,1.30193 -0.904535,2.32976 -1.182855,0.75375 z"
|
||||
id="path3432"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 81.679855,237.8501 0.765375,-1.91863 0.208739,-1.2334 2.156969,0.20557 2.156968,-2.87795 3.409403,1.02784 -0.904535,2.80942 -0.904535,0.34261 -0.626218,2.80943 1.043694,4.72805 -0.904535,1.09636 -1.80907,-2.19272 -0.626217,-1.37045 z"
|
||||
id="path3434"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 78.131294,238.60385 0.626216,3.08351 -0.626216,3.22056 0.765375,0.95931 -0.626216,5.68737 2.504866,2.32976 1.87865,-0.47965 0.417478,-3.35761 1.669911,-0.0685 3.757301,-1.8501 -0.20874,-1.98716 -2.226548,-0.20556 -1.182854,-3.01499 -3.200662,-2.05568 -1.252434,-2.39828 z"
|
||||
id="path3436"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 84.532619,251.41755 -0.278318,1.43898 -0.695797,0.6167 1.322013,2.67238 2.365709,-0.20557 1.53075,-2.94647 -2.365707,-1.98715 z"
|
||||
id="path3438"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 64.993183,249.51154 -1.14959,2.51583 0.766392,1.69818 2.618509,0.25159 0.702526,1.19502 1.021857,2.39003 -0.574794,2.32714 3.89583,1.88688 0.95799,-1.06923 0.510928,-4.59139 -4.023561,-2.70451 -0.127732,-4.21402 z"
|
||||
id="path3440"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.912822,251.00642 h 1.391592 l 2.574446,0.75375 1.391593,1.98715 1.461172,1.30193 -0.139159,3.42612 -3.409402,1.57602 -0.974115,-1.85011 0.626217,-3.3576 -3.270243,-1.85011 z"
|
||||
id="path3442"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.147446,264.77944 1.80907,-1.98715 3.339822,-1.85011 1.322013,-0.0685 4.661835,-3.63169 1.391594,0.34261 0.556637,4.52248 -3.200664,4.04283 -2.852765,-0.82227 -1.80907,0.54818 -0.765376,1.43897 -2.087389,0.68522 z"
|
||||
id="path3444"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 75.06979,272.93361 0.765376,-1.30192 1.252433,-0.41114 0.904535,-2.87794 1.94823,-0.61671 0.556637,2.60386 -3.339822,6.0985 -1.391593,-0.0685 z"
|
||||
id="path3446"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 71.451649,268.20556 -1.252433,1.85011 2.504867,1.98715 0.765376,0.82227 1.73949,-2.39829 -2.296127,-2.80942 -1.461173,0.27409 z"
|
||||
id="path3448"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 62.24531,254.0948 1.461172,1.02784 1.948229,0.54818 0.487058,1.64454 -1.461173,2.67237 -0.06958,1.78159 -1.669911,1.85011 -1.252433,-2.05568 0.487057,-2.80942 -1.391593,-0.34261 -0.904535,-2.80942 z"
|
||||
id="path3450"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 47.585836,246.55246 -0.695796,3.70021 -0.139159,1.37045 1.87865,0.68523 1.391592,0.95931 1.809071,-1.64454 -0.417478,-0.95931 z"
|
||||
id="path3452"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 54.682958,247.78586 -1.043694,1.02784 0.208739,1.98715 1.600331,0.89079 0.626217,-0.47965 0.06958,-2.26125 z"
|
||||
id="path3454"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 48.629531,258.95503 4.800994,-6.16703 3.409402,0.82227 0.556637,1.78159 3.131083,4.79657 -1.669911,5.82441 -3.200663,-1.37045 -0.417478,-3.49464 -2.087388,1.30192 z"
|
||||
id="path3456"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 45.915924,252.71948 -0.487056,1.98715 1.60033,1.57602 1.461174,-0.20557 -0.347899,-2.19272 z"
|
||||
id="path3458"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 67.555189,261.6274 -1.80907,2.80943 -2.435287,8.42826 2.783185,3.76874 1.461172,-0.0685 1.113274,-2.12419 1.043696,-0.20557 0.487057,-1.09636 -1.043694,-4.45396 1.182853,-4.31692 z"
|
||||
id="path3460"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 58.718577,267.79443 1.600331,-1.23341 2.017809,1.71306 -0.904535,1.85011 z"
|
||||
id="path3462"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 58.509838,276.49678 2.156968,-4.591 1.391593,-0.27409 0.834955,1.50749 -2.017809,5.13919 z"
|
||||
id="path3464"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 71.242911,274.02997 1.391592,0.20557 1.043694,3.01499 2.01781,0.68522 1.530751,1.57602 -0.904535,2.87795 -2.365707,2.32976 -0.139159,3.56317 -1.322013,1.98715 -2.504867,-1.85011 -0.278318,-2.67237 -1.530752,-1.78159 -1.113274,-3.08351 3.61814,-4.17987 z"
|
||||
id="path3466"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 62.893354,276.5653 3.270244,1.16489 0.06958,3.70021 -0.556637,0.68523 0.974115,3.70021 1.252433,1.64454 0.06958,3.08351 -2.017809,1.37045 -2.574447,8.08566 -2.574447,-1.30193 -1.948229,-9.79872 z"
|
||||
id="path3468"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 58.440258,283.5546 h 0.556637 l 0.417478,0.95931 -0.208739,1.30193 -1.461172,0.13704 z"
|
||||
id="path3472"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 56.700767,279.16916 -1.113274,0.95931 0.834956,2.80943 1.600331,0.20556 0.487058,-2.05567 -0.695796,-1.91863 z"
|
||||
id="path3474"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 53.152207,272.17987 0.139159,5.13918 1.87865,1.23341 0.834955,-0.54818 0.904535,-3.63169 1.530752,-1.57602 -1.669911,-3.97431 -3.548561,3.08352 z"
|
||||
id="path3476"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 45.915924,258.33832 -0.208739,3.83726 -4.731414,3.97431 1.948229,2.80942 8.488716,0.82227 0.417478,1.98715 1.043694,-0.75375 0.487057,-2.19272 1.182854,-1.64454 -0.417478,-1.09635 -1.87865,-2.60386 -3.757299,-1.37045 -1.461174,-3.22056 z"
|
||||
id="path3480"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 40.279975,263.68308 1.669912,0.6167 3.061502,-6.37259 -0.904535,-5.61884 -2.504867,-0.34262 -1.391592,-1.2334 2.156968,-7.606 -2.087388,-4.45396 -3.409402,1.57602 -0.834956,3.42612 -1.87865,0.20557 -0.347898,2.1242 1.530752,1.64454 h 1.322013 l 0.626217,3.90578 2.296127,5.61884 -0.347898,2.19272 z"
|
||||
id="path3482"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 66.531337,247.61066 -0.590018,-0.31657 -0.420783,-1.71262 0.427793,-0.66945 1.306823,-1.13114 2.316342,-1.38746 1.06612,0.23465 -0.01701,2.21105 -2.36166,3.35302 z"
|
||||
id="path4284"
|
||||
inkscape:connector-curvature="0"
|
||||
inkscape:transform-center-x="4.9927099"
|
||||
inkscape:transform-center-y="-9.3161687" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 72.373733,232.22199 -0.815102,1.03206 4.017286,4.12827 1.571981,0.17201 1.339096,-0.86006 0.931544,0.63071 2.387083,-2.98152 -2.794634,-0.91739 -3.027519,0.22934 z"
|
||||
id="path3601"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 57.407878,237.1102 -1.301737,2.34289 -1.301738,0.61888 -0.17955,1.45878 -4.488748,1.54719 -0.403989,1.50299 0.314213,0.30944 1.032412,0.0884 v 1.41457 l 1.660839,1.50299 2.154598,-1.94504 1.571064,0.35364 2.738136,-1.94504 -1.436399,-2.56392 0.987525,-3.44803 -0.583538,-1.37037 z"
|
||||
id="path3603"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 62.104217,246.96003 5.843936,-6.55723 0.659867,-2.66044 2.221783,-0.40757 -0.386451,-3.39556 -2.000988,-0.60704 -6.246127,-0.36572 -2.624948,2.5137 1.519708,2.75102 -0.347742,5.51876 z"
|
||||
id="path3605"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 71.024647,249.63275 5.822153,1.31875 1.047988,-3.89891 -1.280874,-1.43343 0.523995,-6.02038 -3.551515,5.275 0.34933,2.06413 -2.037753,0.80272 -1.164431,0.45869 z"
|
||||
id="path3607"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 59.099222,247.24427 -2.095974,1.72011 -0.05822,1.60543 0.465772,1.72011 1.455539,0.97473 -0.407551,0.97473 2.328861,-0.34402 2.27064,-2.86685 -1.571981,-0.57337 -0.640437,-2.86685 -1.51376,-0.40136 z"
|
||||
id="path3609"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 44.078067,234.34346 0.291107,4.47228 -1.863089,1.43342 2.095976,3.72691 2.037753,0.0573 2.27064,-3.55489 -2.969297,-4.98831 z"
|
||||
id="path3611"
|
||||
inkscape:connector-curvature="0" />
|
||||
<path
|
||||
style="fill:#1a5fb4;fill-opacity:1;fill-rule:evenodd;stroke:none;stroke-width:0.13671011px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1"
|
||||
d="m 44.660282,245.46683 -3.318627,4.30027 1.339096,1.26141 2.561747,-0.28668 1.222652,-3.15354 z"
|
||||
id="path3613"
|
||||
inkscape:connector-curvature="0" />
|
||||
</g>
|
||||
</g>
|
||||
</svg>
|
After Width: | Height: | Size: 34 KiB |
BIN
tests/data/weird_producer.odt
Normal file
54
tests/fuzz.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import mimetypes
|
||||
import os
|
||||
import sys
|
||||
|
||||
sys.path.append('..')
|
||||
|
||||
import atheris
|
||||
|
||||
with atheris.instrument_imports(enable_loader_override=False):
|
||||
from libmat2 import parser_factory, UNSUPPORTED_EXTENSIONS
|
||||
|
||||
extensions = set()
|
||||
for parser in parser_factory._get_parsers(): # type: ignore
|
||||
for mtype in parser.mimetypes:
|
||||
if mtype.startswith('video'):
|
||||
continue
|
||||
if 'aif' in mtype:
|
||||
continue
|
||||
if 'wav' in mtype:
|
||||
continue
|
||||
if 'gif' in mtype:
|
||||
continue
|
||||
if 'aifc' in mtype:
|
||||
continue
|
||||
for extension in mimetypes.guess_all_extensions(mtype):
|
||||
if extension not in UNSUPPORTED_EXTENSIONS:
|
||||
extensions.add(extension)
|
||||
extensions = list(extensions)
|
||||
|
||||
|
||||
|
||||
def TestOneInput(data):
|
||||
fdp = atheris.FuzzedDataProvider(data)
|
||||
extension = fdp.PickValueInList(extensions)
|
||||
data = fdp.ConsumeBytes(sys.maxsize)
|
||||
|
||||
fname = '/tmp/mat2_fuzz' + extension
|
||||
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(data)
|
||||
try:
|
||||
p, _ = parser_factory.get_parser(fname)
|
||||
if p:
|
||||
p.sandbox = False
|
||||
p.get_meta()
|
||||
p.remove_all()
|
||||
p, _ = parser_factory.get_parser(fname)
|
||||
p.get_meta()
|
||||
except ValueError:
|
||||
pass
|
||||
os.remove(fname)
|
||||
|
||||
atheris.Setup(sys.argv, TestOneInput)
|
||||
atheris.Fuzz()
|
@@ -1,100 +1,203 @@
|
||||
import random
|
||||
import os
|
||||
import shutil
|
||||
import stat
|
||||
import subprocess
|
||||
import unittest
|
||||
import glob
|
||||
|
||||
from libmat2 import images, parser_factory
|
||||
|
||||
|
||||
mat2_binary = ['./mat2']
|
||||
|
||||
if 'MAT2_GLOBAL_PATH_TESTSUITE' in os.environ:
|
||||
# Debian runs tests after installing the package
|
||||
# https://0xacab.org/jvoisin/mat2/issues/16#note_153878
|
||||
mat2_binary = ['/usr/bin/env', 'mat2']
|
||||
|
||||
|
||||
class TestHelp(unittest.TestCase):
|
||||
def test_help(self):
|
||||
proc = subprocess.Popen(['./mat2', '--help'], stdout=subprocess.PIPE)
|
||||
proc = subprocess.Popen(mat2_binary + ['--help'], stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'usage: mat2 [-h] [-v] [-l] [-c | -s | -L] [files [files ...]]', stdout)
|
||||
self.assertIn(b'mat2 [-h] [-V]', stdout)
|
||||
self.assertIn(b'[--unknown-members policy]', stdout)
|
||||
self.assertIn(b'[--inplace]', stdout)
|
||||
self.assertIn(b'[--no-sandbox]', stdout)
|
||||
self.assertIn(b' [-v] [-l]', stdout)
|
||||
self.assertIn(b'[--check-dependencies]', stdout)
|
||||
self.assertIn(b'[-L | -s]', stdout)
|
||||
self.assertIn(b'[files ...]', stdout)
|
||||
|
||||
def test_no_arg(self):
|
||||
proc = subprocess.Popen(['./mat2'], stdout=subprocess.PIPE)
|
||||
proc = subprocess.Popen(mat2_binary, stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'usage: mat2 [-h] [-v] [-l] [-c | -s | -L] [files [files ...]]', stdout)
|
||||
self.assertIn(b'mat2 [-h] [-V]', stdout)
|
||||
self.assertIn(b'[--unknown-members policy]', stdout)
|
||||
self.assertIn(b'[--inplace]', stdout)
|
||||
self.assertIn(b'[--no-sandbox]', stdout)
|
||||
self.assertIn(b' [-v] [-l] [--check-dependencies] [-L | -s]', stdout)
|
||||
self.assertIn(b'[files ...]', stdout)
|
||||
|
||||
|
||||
class TestVersion(unittest.TestCase):
|
||||
def test_version(self):
|
||||
proc = subprocess.Popen(['./mat2', '--version'], stdout=subprocess.PIPE)
|
||||
proc = subprocess.Popen(mat2_binary + ['--version'], stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertTrue(stdout.startswith(b'MAT2 '))
|
||||
self.assertTrue(stdout.startswith(b'mat2 '))
|
||||
|
||||
|
||||
class TestExclusiveArgs(unittest.TestCase):
|
||||
def test_version(self):
|
||||
proc = subprocess.Popen(['./mat2', '-s', '-c'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||
stdout, stderr = proc.communicate()
|
||||
self.assertIn(b'mat2: error: argument -c/--check: not allowed with argument -s/--show', stderr)
|
||||
class TestDependencies(unittest.TestCase):
|
||||
def test_dependencies(self):
|
||||
proc = subprocess.Popen(mat2_binary + ['--check-dependencies'], stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertTrue(b'mat2' in stdout)
|
||||
|
||||
|
||||
class TestReturnValue(unittest.TestCase):
|
||||
def test_nonzero(self):
|
||||
ret = subprocess.call(['./mat2', './mat2'], stdout=subprocess.DEVNULL)
|
||||
ret = subprocess.call(mat2_binary + ['mat2'], stdout=subprocess.DEVNULL)
|
||||
self.assertEqual(255, ret)
|
||||
|
||||
ret = subprocess.call(['./mat2', '--whololo'], stderr=subprocess.DEVNULL)
|
||||
ret = subprocess.call(mat2_binary + ['--whololo'], stderr=subprocess.DEVNULL)
|
||||
self.assertEqual(2, ret)
|
||||
|
||||
def test_zero(self):
|
||||
ret = subprocess.call(['./mat2'], stdout=subprocess.DEVNULL)
|
||||
ret = subprocess.call(mat2_binary, stdout=subprocess.DEVNULL)
|
||||
self.assertEqual(0, ret)
|
||||
|
||||
ret = subprocess.call(['./mat2', '--show', './mat2'], stdout=subprocess.DEVNULL)
|
||||
ret = subprocess.call(mat2_binary + ['--show', 'mat2'], stdout=subprocess.DEVNULL)
|
||||
self.assertEqual(0, ret)
|
||||
|
||||
|
||||
class TestCleanFolder(unittest.TestCase):
|
||||
def test_jpg(self):
|
||||
try:
|
||||
os.mkdir('./tests/data/folder/')
|
||||
except FileExistsError:
|
||||
pass
|
||||
shutil.copy('./tests/data/dirty.jpg', './tests/data/folder/clean1.jpg')
|
||||
shutil.copy('./tests/data/dirty.jpg', './tests/data/folder/clean2.jpg')
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/folder/'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'Comment: Created with GIMP', stdout)
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['./tests/data/folder/'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
os.remove('./tests/data/folder/clean1.jpg')
|
||||
os.remove('./tests/data/folder/clean2.jpg')
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/folder/'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertNotIn(b'Comment: Created with GIMP', stdout)
|
||||
self.assertIn(b'No metadata found', stdout)
|
||||
|
||||
shutil.rmtree('./tests/data/folder/')
|
||||
|
||||
|
||||
class TestCleanMeta(unittest.TestCase):
|
||||
def test_jpg(self):
|
||||
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
|
||||
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/clean.jpg'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'Comment: Created with GIMP', stdout)
|
||||
|
||||
proc = subprocess.Popen(['./mat2', './tests/data/clean.jpg'],
|
||||
proc = subprocess.Popen(mat2_binary + ['./tests/data/clean.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/clean.cleaned.jpg'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.cleaned.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertNotIn(b'Comment: Created with GIMP', stdout)
|
||||
|
||||
os.remove('./tests/data/clean.jpg')
|
||||
|
||||
def test_jpg_nosandbox(self):
|
||||
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', '--no-sandbox', './tests/data/clean.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'Comment: Created with GIMP', stdout)
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['./tests/data/clean.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.cleaned.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertNotIn(b'Comment: Created with GIMP', stdout)
|
||||
|
||||
os.remove('./tests/data/clean.jpg')
|
||||
os.remove('./tests/data/clean.cleaned.jpg')
|
||||
|
||||
|
||||
class TestCopyPermissions(unittest.TestCase):
|
||||
def test_jpg_777(self):
|
||||
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
|
||||
os.chmod('./tests/data/clean.jpg', 0o777)
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'Comment: Created with GIMP', stdout)
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['./tests/data/clean.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.cleaned.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertNotIn(b'Comment: Created with GIMP', stdout)
|
||||
|
||||
permissions = os.stat('./tests/data/clean.cleaned.jpg')[stat.ST_MODE]
|
||||
self.assertEqual(permissions, 0o100777)
|
||||
|
||||
os.remove('./tests/data/clean.jpg')
|
||||
os.remove('./tests/data/clean.cleaned.jpg')
|
||||
|
||||
|
||||
class TestIsSupported(unittest.TestCase):
|
||||
def test_pdf(self):
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.pdf'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.pdf'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertNotIn(b"isn't supported", stdout)
|
||||
|
||||
class TestGetMeta(unittest.TestCase):
|
||||
maxDiff = None
|
||||
|
||||
def test_pdf(self):
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.pdf'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.pdf'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'producer: pdfTeX-1.40.14', stdout)
|
||||
self.assertIn(b'Producer: pdfTeX-1.40.14', stdout)
|
||||
|
||||
def test_png(self):
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.png'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.png'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'Comment: This is a comment, be careful!', stdout)
|
||||
|
||||
def test_jpg(self):
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.jpg'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'Comment: Created with GIMP', stdout)
|
||||
|
||||
def test_docx(self):
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.docx'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.docx'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'Application: LibreOffice/5.4.5.1$Linux_X86_64', stdout)
|
||||
@@ -102,7 +205,7 @@ class TestGetMeta(unittest.TestCase):
|
||||
self.assertIn(b'revision: 1', stdout)
|
||||
|
||||
def test_odt(self):
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.odt'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.odt'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'generator: LibreOffice/3.3$Unix', stdout)
|
||||
@@ -110,25 +213,126 @@ class TestGetMeta(unittest.TestCase):
|
||||
self.assertIn(b'date_time: 2011-07-26 02:40:16', stdout)
|
||||
|
||||
def test_mp3(self):
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.mp3'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.mp3'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'TALB: harmfull', stdout)
|
||||
self.assertIn(b'COMM::: Thank you for using MAT !', stdout)
|
||||
|
||||
def test_flac(self):
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.flac'],
|
||||
stdout=subprocess.PIPE)
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.flac'],
|
||||
stdout=subprocess.PIPE, bufsize=0)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'comments: Thank you for using MAT !', stdout)
|
||||
self.assertIn(b'genre: Python', stdout)
|
||||
self.assertIn(b'title: I am so', stdout)
|
||||
|
||||
def test_ogg(self):
|
||||
proc = subprocess.Popen(['./mat2', '--show', './tests/data/dirty.ogg'],
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.ogg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'comments: Thank you for using MAT !', stdout)
|
||||
self.assertIn(b'genre: Python', stdout)
|
||||
self.assertIn(b'i am a : various comment', stdout)
|
||||
self.assertIn(b'artist: jvoisin', stdout)
|
||||
|
||||
#def test_webp(self):
|
||||
# proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/dirty.webp'],
|
||||
# stdout=subprocess.PIPE)
|
||||
# stdout, _ = proc.communicate()
|
||||
# self.assertIn(b'Warning: [minor] Improper EXIF header', stdout)
|
||||
|
||||
class TestControlCharInjection(unittest.TestCase):
|
||||
def test_jpg(self):
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/control_chars.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b'Comment: GQ\n', stdout)
|
||||
|
||||
|
||||
class TestCommandLineParallel(unittest.TestCase):
|
||||
iterations = 24
|
||||
|
||||
def test_same(self):
|
||||
for i in range(self.iterations):
|
||||
shutil.copy('./tests/data/dirty.jpg', './tests/data/dirty_%d.jpg' % i)
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['./tests/data/dirty_%d.jpg' % i for i in range(self.iterations)],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
for i in range(self.iterations):
|
||||
path = './tests/data/dirty_%d.jpg' % i
|
||||
p = images.JPGParser('./tests/data/dirty_%d.cleaned.jpg' % i)
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
os.remove('./tests/data/dirty_%d.cleaned.jpg' % i)
|
||||
os.remove(path)
|
||||
|
||||
def test_different(self):
|
||||
src = './tests/data/'
|
||||
dst = './tests/data/parallel'
|
||||
shutil.copytree(src, dst)
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + glob.glob('./tests/data/parallel/dirty.*'),
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
for i in glob.glob('./test/data/parallel/dirty.cleaned.*'):
|
||||
p, mime = parser_factory.get_parser(i)
|
||||
self.assertIsNotNone(mime)
|
||||
self.assertIsNotNone(p)
|
||||
p = parser_factory.get_parser(p.output_filename)
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
shutil.rmtree('./tests/data/parallel/')
|
||||
|
||||
def test_faulty(self):
|
||||
for i in range(self.iterations):
|
||||
shutil.copy('./tests/data/dirty.jpg', './tests/data/dirty_%d.jpg' % i)
|
||||
shutil.copy('./tests/data/dirty.torrent', './tests/data/dirty_%d.docx' % i)
|
||||
|
||||
to_process = ['./tests/data/dirty_%d.jpg' % i for i in range(self.iterations)]
|
||||
to_process.extend(['./tests/data/dirty_%d.docx' % i for i in range(self.iterations)])
|
||||
random.shuffle(to_process)
|
||||
proc = subprocess.Popen(mat2_binary + to_process,
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
for i in range(self.iterations):
|
||||
path = './tests/data/dirty_%d.jpg' % i
|
||||
p = images.JPGParser('./tests/data/dirty_%d.cleaned.jpg' % i)
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
os.remove('./tests/data/dirty_%d.cleaned.jpg' % i)
|
||||
os.remove(path)
|
||||
os.remove('./tests/data/dirty_%d.docx' % i)
|
||||
|
||||
|
||||
class TestInplaceCleaning(unittest.TestCase):
|
||||
def test_cleaning(self):
|
||||
shutil.copy('./tests/data/dirty.jpg', './tests/data/clean.jpg')
|
||||
proc = subprocess.Popen(mat2_binary + ['--inplace', './tests/data/clean.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
proc = subprocess.Popen(mat2_binary + ['--show', './tests/data/clean.jpg'],
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
self.assertIn(b' No metadata found in ./tests/data/clean.jpg.\n', stdout)
|
||||
os.remove('./tests/data/clean.jpg')
|
||||
|
||||
def test_cleaning_multiple_one_fails(self):
|
||||
files = ['./tests/data/clean_%d.jpg' % i for i in range(9)]
|
||||
for f in files:
|
||||
shutil.copy('./tests/data/dirty.jpg', f)
|
||||
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean_9.jpg')
|
||||
|
||||
proc = subprocess.Popen(mat2_binary + ['--inplace'] + files,
|
||||
stdout=subprocess.PIPE)
|
||||
stdout, _ = proc.communicate()
|
||||
|
||||
for f in files:
|
||||
p = images.JPGParser(f)
|
||||
meta = p.get_meta()
|
||||
self.assertEqual(meta, {})
|
||||
|
||||
for i in range(10):
|
||||
os.remove('./tests/data/clean_%d.jpg' % i)
|
||||
|
||||
|
471
tests/test_corrupted_files.py
Normal file
@@ -0,0 +1,471 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import unittest
|
||||
import stat
|
||||
import time
|
||||
import shutil
|
||||
import os
|
||||
import logging
|
||||
import zipfile
|
||||
import tarfile
|
||||
|
||||
from libmat2 import pdf, images, audio, office, parser_factory, torrent
|
||||
from libmat2 import harmless, video, web, archive
|
||||
|
||||
# No need to logging messages, should something go wrong,
|
||||
# the testsuite _will_ fail.
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.FATAL)
|
||||
|
||||
|
||||
class TestInexistentFiles(unittest.TestCase):
|
||||
def test_ro(self):
|
||||
parser, mimetype = parser_factory.get_parser('/etc/passwd')
|
||||
self.assertEqual(mimetype, None)
|
||||
self.assertEqual(parser, None)
|
||||
|
||||
def test_notaccessible(self):
|
||||
parser, mimetype = parser_factory.get_parser('/etc/shadow')
|
||||
self.assertEqual(mimetype, None)
|
||||
self.assertEqual(parser, None)
|
||||
|
||||
def test_folder(self):
|
||||
parser, mimetype = parser_factory.get_parser('./tests/')
|
||||
self.assertEqual(mimetype, None)
|
||||
self.assertEqual(parser, None)
|
||||
|
||||
def test_inexistingfile(self):
|
||||
parser, mimetype = parser_factory.get_parser('./tests/NONEXISTING_FILE')
|
||||
self.assertEqual(mimetype, None)
|
||||
self.assertEqual(parser, None)
|
||||
|
||||
def test_chardevice(self):
|
||||
parser, mimetype = parser_factory.get_parser('/dev/zero')
|
||||
self.assertEqual(mimetype, None)
|
||||
self.assertEqual(parser, None)
|
||||
|
||||
def test_brokensymlink(self):
|
||||
shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
|
||||
os.symlink('./tests/clean.py', './tests/SYMLINK')
|
||||
os.remove('./tests/clean.py')
|
||||
parser, mimetype = parser_factory.get_parser('./tests/SYMLINK')
|
||||
self.assertEqual(mimetype, None)
|
||||
self.assertEqual(parser, None)
|
||||
os.unlink('./tests/SYMLINK')
|
||||
|
||||
class TestUnsupportedFiles(unittest.TestCase):
|
||||
def test_pdf(self):
|
||||
shutil.copy('./tests/test_libmat2.py', './tests/clean.py')
|
||||
parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')
|
||||
self.assertEqual(mimetype, 'text/x-python')
|
||||
self.assertEqual(parser, None)
|
||||
os.remove('./tests/clean.py')
|
||||
|
||||
class TestCorruptedEmbedded(unittest.TestCase):
|
||||
def test_docx(self):
|
||||
shutil.copy('./tests/data/embedded_corrupted.docx', './tests/data/clean.docx')
|
||||
parser, _ = parser_factory.get_parser('./tests/data/clean.docx')
|
||||
with self.assertRaises(ValueError):
|
||||
parser.remove_all()
|
||||
with self.assertRaises(ValueError):
|
||||
self.assertIsNotNone(parser.get_meta())
|
||||
os.remove('./tests/data/clean.docx')
|
||||
|
||||
def test_odt(self):
|
||||
shutil.copy('./tests/data/embedded_corrupted.odt', './tests/data/clean.odt')
|
||||
parser, _ = parser_factory.get_parser('./tests/data/clean.odt')
|
||||
self.assertFalse(parser.remove_all())
|
||||
self.assertTrue(parser.get_meta())
|
||||
os.remove('./tests/data/clean.odt')
|
||||
|
||||
|
||||
class TestExplicitelyUnsupportedFiles(unittest.TestCase):
|
||||
def test_pdf(self):
|
||||
shutil.copy('./tests/test_libmat2.py', './tests/data/clean.py')
|
||||
parser, mimetype = parser_factory.get_parser('./tests/data/clean.py')
|
||||
self.assertEqual(mimetype, 'text/x-python')
|
||||
self.assertEqual(parser, None)
|
||||
os.remove('./tests/data/clean.py')
|
||||
|
||||
|
||||
class TestWrongContentTypesFileOffice(unittest.TestCase):
|
||||
def test_office_incomplete(self):
|
||||
shutil.copy('./tests/data/malformed_content_types.docx', './tests/data/clean.docx')
|
||||
with self.assertRaises(ValueError):
|
||||
office.MSOfficeParser('./tests/data/clean.docx')
|
||||
os.remove('./tests/data/clean.docx')
|
||||
|
||||
def test_office_broken(self):
|
||||
shutil.copy('./tests/data/broken_xml_content_types.docx', './tests/data/clean.docx')
|
||||
with self.assertRaises(ValueError):
|
||||
office.MSOfficeParser('./tests/data/clean.docx')
|
||||
os.remove('./tests/data/clean.docx')
|
||||
|
||||
def test_office_absent(self):
|
||||
shutil.copy('./tests/data/no_content_types.docx', './tests/data/clean.docx')
|
||||
with self.assertRaises(ValueError):
|
||||
office.MSOfficeParser('./tests/data/clean.docx')
|
||||
os.remove('./tests/data/clean.docx')
|
||||
|
||||
class TestCorruptedFiles(unittest.TestCase):
|
||||
def test_pdf(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
|
||||
with self.assertRaises(ValueError):
|
||||
pdf.PDFParser('./tests/data/clean.png')
|
||||
os.remove('./tests/data/clean.png')
|
||||
|
||||
def test_png(self):
|
||||
shutil.copy('./tests/data/dirty.pdf', './tests/data/clean.pdf')
|
||||
with self.assertRaises(ValueError):
|
||||
images.PNGParser('./tests/data/clean.pdf')
|
||||
os.remove('./tests/data/clean.pdf')
|
||||
|
||||
def test_png2(self):
|
||||
shutil.copy('./tests/test_libmat2.py', './tests/clean.png')
|
||||
with self.assertRaises(ValueError):
|
||||
parser_factory.get_parser('./tests/clean.png')
|
||||
os.remove('./tests/clean.png')
|
||||
|
||||
def test_torrent(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.torrent')
|
||||
with self.assertRaises(ValueError):
|
||||
torrent.TorrentParser('./tests/data/clean.torrent')
|
||||
|
||||
with open("./tests/data/clean.torrent", "a") as f:
|
||||
f.write("trailing garbage")
|
||||
with self.assertRaises(ValueError):
|
||||
torrent.TorrentParser('./tests/data/clean.torrent')
|
||||
|
||||
with open("./tests/data/clean.torrent", "w") as f:
|
||||
f.write("i-0e")
|
||||
with self.assertRaises(ValueError):
|
||||
torrent.TorrentParser('./tests/data/clean.torrent')
|
||||
|
||||
with open("./tests/data/clean.torrent", "w") as f:
|
||||
f.write("i00e")
|
||||
with self.assertRaises(ValueError):
|
||||
torrent.TorrentParser('./tests/data/clean.torrent')
|
||||
|
||||
with open("./tests/data/clean.torrent", "w") as f:
|
||||
f.write("01:AAAAAAAAA")
|
||||
with self.assertRaises(ValueError):
|
||||
torrent.TorrentParser('./tests/data/clean.torrent')
|
||||
|
||||
with open("./tests/data/clean.torrent", "w") as f:
|
||||
f.write("1:aaa")
|
||||
with self.assertRaises(ValueError):
|
||||
torrent.TorrentParser('./tests/data/clean.torrent')
|
||||
|
||||
os.remove('./tests/data/clean.torrent')
|
||||
|
||||
def test_odg(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.odg')
|
||||
with self.assertRaises(ValueError):
|
||||
office.LibreOfficeParser('./tests/data/clean.odg')
|
||||
os.remove('./tests/data/clean.odg')
|
||||
|
||||
def test_bmp(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.bmp')
|
||||
ret = harmless.HarmlessParser('./tests/data/clean.bmp')
|
||||
self.assertIsNotNone(ret)
|
||||
os.remove('./tests/data/clean.bmp')
|
||||
|
||||
def test_docx(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.docx')
|
||||
with self.assertRaises(ValueError):
|
||||
office.MSOfficeParser('./tests/data/clean.docx')
|
||||
os.remove('./tests/data/clean.docx')
|
||||
|
||||
def test_flac(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.flac')
|
||||
with self.assertRaises(ValueError):
|
||||
audio.FLACParser('./tests/data/clean.flac')
|
||||
os.remove('./tests/data/clean.flac')
|
||||
|
||||
def test_mp3(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.mp3')
|
||||
with self.assertRaises(ValueError):
|
||||
audio.MP3Parser('./tests/data/clean.mp3')
|
||||
os.remove('./tests/data/clean.mp3')
|
||||
|
||||
def test_wrong_tif(self):
|
||||
shutil.copy('./tests/data/dirty.tiff', './tests/data/clean.tif')
|
||||
p = images.TiffParser('./tests/data/clean.tif')
|
||||
p.remove_all()
|
||||
p = images.TiffParser('./tests/data/clean.cleaned.tif')
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
os.remove('./tests/data/clean.tif')
|
||||
os.remove('./tests/data/clean.cleaned.tif')
|
||||
|
||||
def test_jpg(self):
|
||||
shutil.copy('./tests/data/dirty.mp3', './tests/data/clean.jpg')
|
||||
with self.assertRaises(ValueError):
|
||||
images.JPGParser('./tests/data/clean.jpg')
|
||||
os.remove('./tests/data/clean.jpg')
|
||||
|
||||
def test_png_lightweight(self):
|
||||
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.png')
|
||||
with self.assertRaises(ValueError):
|
||||
images.PNGParser('./tests/data/clean.png')
|
||||
os.remove('./tests/data/clean.png')
|
||||
|
||||
def test_avi(self):
|
||||
try:
|
||||
video._get_ffmpeg_path()
|
||||
except RuntimeError:
|
||||
raise unittest.SkipTest
|
||||
|
||||
shutil.copy('./tests/data/dirty.torrent', './tests/data/clean.avi')
|
||||
p = video.AVIParser('./tests/data/clean.avi')
|
||||
self.assertFalse(p.remove_all())
|
||||
os.remove('./tests/data/clean.avi')
|
||||
|
||||
def test_avi_injection(self):
|
||||
try:
|
||||
video._get_ffmpeg_path()
|
||||
except RuntimeError:
|
||||
raise unittest.SkipTest
|
||||
|
||||
shutil.copy('./tests/data/dirty.torrent', './tests/data/--output.avi')
|
||||
p = video.AVIParser('./tests/data/--output.avi')
|
||||
self.assertFalse(p.remove_all())
|
||||
os.remove('./tests/data/--output.avi')
|
||||
|
||||
def test_zip(self):
|
||||
with zipfile.ZipFile('./tests/data/clean.zip', 'w') as zout:
|
||||
zout.write('./tests/data/dirty.flac')
|
||||
zout.write('./tests/data/dirty.docx')
|
||||
zout.write('./tests/data/dirty.jpg')
|
||||
zout.write('./tests/data/embedded_corrupted.docx')
|
||||
p, mimetype = parser_factory.get_parser('./tests/data/clean.zip')
|
||||
self.assertEqual(mimetype, 'application/zip')
|
||||
with self.assertRaises(ValueError):
|
||||
p.get_meta()
|
||||
with self.assertRaises(ValueError):
|
||||
self.assertFalse(p.remove_all())
|
||||
os.remove('./tests/data/clean.zip')
|
||||
|
||||
def test_html(self):
|
||||
shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
|
||||
with open('./tests/data/clean.html', 'a') as f:
|
||||
f.write('<open>but not</closed>')
|
||||
with self.assertRaises(ValueError):
|
||||
web.HTMLParser('./tests/data/clean.html')
|
||||
os.remove('./tests/data/clean.html')
|
||||
|
||||
# Yes, we're able to deal with malformed html :/
|
||||
shutil.copy('./tests/data/dirty.html', './tests/data/clean.html')
|
||||
with open('./tests/data/clean.html', 'a') as f:
|
||||
f.write('<meta name=\'this" is="weird"/>')
|
||||
p = web.HTMLParser('./tests/data/clean.html')
|
||||
self.assertTrue(p.remove_all())
|
||||
p = web.HTMLParser('./tests/data/clean.cleaned.html')
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
os.remove('./tests/data/clean.html')
|
||||
os.remove('./tests/data/clean.cleaned.html')
|
||||
|
||||
with open('./tests/data/clean.html', 'w') as f:
|
||||
f.write('</meta>')
|
||||
with self.assertRaises(ValueError):
|
||||
web.HTMLParser('./tests/data/clean.html')
|
||||
os.remove('./tests/data/clean.html')
|
||||
|
||||
with open('./tests/data/clean.html', 'w') as f:
|
||||
f.write('<meta><a>test</a><set/></meta><title></title><meta>')
|
||||
p = web.HTMLParser('./tests/data/clean.html')
|
||||
with self.assertRaises(ValueError):
|
||||
p.get_meta()
|
||||
p = web.HTMLParser('./tests/data/clean.html')
|
||||
with self.assertRaises(ValueError):
|
||||
p.remove_all()
|
||||
os.remove('./tests/data/clean.html')
|
||||
|
||||
with open('./tests/data/clean.html', 'w') as f:
|
||||
f.write('<doctitle><br/></doctitle><br/><notclosed>')
|
||||
p = web.HTMLParser('./tests/data/clean.html')
|
||||
with self.assertRaises(ValueError):
|
||||
p.get_meta()
|
||||
p = web.HTMLParser('./tests/data/clean.html')
|
||||
with self.assertRaises(ValueError):
|
||||
p.remove_all()
|
||||
os.remove('./tests/data/clean.html')
|
||||
|
||||
def test_epub(self):
|
||||
with zipfile.ZipFile('./tests/data/clean.epub', 'w') as zout:
|
||||
zout.write('./tests/data/dirty.jpg', 'OEBPS/content.opf')
|
||||
p, mimetype = parser_factory.get_parser('./tests/data/clean.epub')
|
||||
self.assertEqual(mimetype, 'application/epub+zip')
|
||||
meta = p.get_meta()
|
||||
self.assertEqual(meta['OEBPS/content.opf']['OEBPS/content.opf'],
|
||||
'harmful content')
|
||||
|
||||
self.assertFalse(p.remove_all())
|
||||
os.remove('./tests/data/clean.epub')
|
||||
|
||||
def test_tar(self):
|
||||
with tarfile.TarFile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/dirty.flac')
|
||||
zout.add('./tests/data/dirty.docx')
|
||||
zout.add('./tests/data/dirty.jpg')
|
||||
zout.add('./tests/data/embedded_corrupted.docx')
|
||||
tarinfo = tarfile.TarInfo(name='./tests/data/dirty.png')
|
||||
tarinfo.mtime = time.time()
|
||||
tarinfo.uid = 1337
|
||||
tarinfo.gid = 1338
|
||||
tarinfo.size = os.stat('./tests/data/dirty.png').st_size
|
||||
with open('./tests/data/dirty.png', 'rb') as f:
|
||||
zout.addfile(tarinfo, f)
|
||||
p, mimetype = parser_factory.get_parser('./tests/data/clean.tar')
|
||||
self.assertEqual(mimetype, 'application/x-tar')
|
||||
with self.assertRaises(ValueError):
|
||||
p.get_meta()
|
||||
with self.assertRaises(ValueError):
|
||||
self.assertFalse(p.remove_all())
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.tar')
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
||||
class TestReadOnlyArchiveMembers(unittest.TestCase):
|
||||
def test_onlymember_tar(self):
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/dirty.png')
|
||||
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
|
||||
tarinfo.mtime = time.time()
|
||||
tarinfo.uid = 1337
|
||||
tarinfo.gid = 0
|
||||
tarinfo.mode = 0o000
|
||||
tarinfo.size = os.stat('./tests/data/dirty.jpg').st_size
|
||||
with open('./tests/data/dirty.jpg', 'rb') as f:
|
||||
zout.addfile(tarinfo=tarinfo, fileobj=f)
|
||||
p, mimetype = parser_factory.get_parser('./tests/data/clean.tar')
|
||||
self.assertEqual(mimetype, 'application/x-tar')
|
||||
meta = p.get_meta()
|
||||
self.assertEqual(meta['./tests/data/dirty.jpg']['uid'], '1337')
|
||||
self.assertTrue(p.remove_all())
|
||||
|
||||
p = archive.TarParser('./tests/data/clean.cleaned.tar')
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
os.remove('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.cleaned.tar')
|
||||
|
||||
|
||||
class TestPathTraversalArchiveMembers(unittest.TestCase):
|
||||
def test_tar_traversal(self):
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/dirty.png')
|
||||
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
|
||||
tarinfo.name = '../../../../../../../../../../tmp/mat2_test.png'
|
||||
with open('./tests/data/dirty.jpg', 'rb') as f:
|
||||
zout.addfile(tarinfo=tarinfo, fileobj=f)
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
||||
def test_tar_absolute_path(self):
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/dirty.png')
|
||||
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
|
||||
tarinfo.name = '/etc/passwd'
|
||||
with open('./tests/data/dirty.jpg', 'rb') as f:
|
||||
zout.addfile(tarinfo=tarinfo, fileobj=f)
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
||||
def test_tar_duplicate_file(self):
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
for _ in range(3):
|
||||
zout.add('./tests/data/dirty.png')
|
||||
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
|
||||
with open('./tests/data/dirty.jpg', 'rb') as f:
|
||||
zout.addfile(tarinfo=tarinfo, fileobj=f)
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
||||
def test_tar_setuid(self):
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/dirty.png')
|
||||
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
|
||||
tarinfo.mode |= stat.S_ISUID
|
||||
with open('./tests/data/dirty.jpg', 'rb') as f:
|
||||
zout.addfile(tarinfo=tarinfo, fileobj=f)
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
||||
def test_tar_setgid(self):
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/dirty.png')
|
||||
tarinfo = tarfile.TarInfo('./tests/data/dirty.jpg')
|
||||
tarinfo.mode |= stat.S_ISGID
|
||||
with open('./tests/data/dirty.jpg', 'rb') as f:
|
||||
zout.addfile(tarinfo=tarinfo, fileobj=f)
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
||||
def test_tar_symlink_absolute(self):
|
||||
os.symlink('/etc/passwd', './tests/data/symlink')
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/symlink')
|
||||
tarinfo = tarfile.TarInfo('./tests/data/symlink')
|
||||
tarinfo.linkname = '/etc/passwd'
|
||||
tarinfo.type = tarfile.SYMTYPE
|
||||
with open('./tests/data/dirty.jpg', 'rb') as f:
|
||||
zout.addfile(tarinfo=tarinfo, fileobj=f)
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/symlink')
|
||||
|
||||
def test_tar_symlink_ok(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/dirty.png')
|
||||
t = tarfile.TarInfo('mydir')
|
||||
t.type = tarfile.DIRTYPE
|
||||
zout.addfile(t)
|
||||
zout.add('./tests/data/clean.png')
|
||||
t = tarfile.TarInfo('mylink')
|
||||
t.type = tarfile.SYMTYPE
|
||||
t.linkname = './tests/data/clean.png'
|
||||
zout.addfile(t)
|
||||
zout.add('./tests/data/dirty.jpg')
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.png')
|
||||
|
||||
def test_tar_symlink_relative(self):
|
||||
os.symlink('../../../etc/passwd', './tests/data/symlink')
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('./tests/data/symlink')
|
||||
tarinfo = tarfile.TarInfo('./tests/data/symlink')
|
||||
with open('./tests/data/dirty.jpg', 'rb') as f:
|
||||
zout.addfile(tarinfo=tarinfo, fileobj=f)
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/symlink')
|
||||
|
||||
def test_tar_device_file(self):
|
||||
with tarfile.open('./tests/data/clean.tar', 'w') as zout:
|
||||
zout.add('/dev/null')
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/clean.tar')
|
||||
os.remove('./tests/data/clean.tar')
|
||||
|
||||
def test_tar_hardlink(self):
|
||||
shutil.copy('./tests/data/dirty.png', './tests/data/clean.png')
|
||||
os.link('./tests/data/clean.png', './tests/data/hardlink.png')
|
||||
with tarfile.open('./tests/data/cleaner.tar', 'w') as zout:
|
||||
zout.add('tests/data/clean.png')
|
||||
zout.add('tests/data/hardlink.png')
|
||||
with self.assertRaises(ValueError):
|
||||
archive.TarParser('./tests/data/cleaner.tar')
|
||||
os.remove('./tests/data/cleaner.tar')
|
||||
os.remove('./tests/data/clean.png')
|
||||
os.remove('./tests/data/hardlink.png')
|
170
tests/test_deep_cleaning.py
Normal file
@@ -0,0 +1,170 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import unittest
|
||||
import shutil
|
||||
import os
|
||||
import zipfile
|
||||
import tempfile
|
||||
|
||||
from libmat2 import office, parser_factory
|
||||
|
||||
class TestZipMetadata(unittest.TestCase):
|
||||
def __check_deep_meta(self, p):
|
||||
tempdir = tempfile.mkdtemp()
|
||||
zipin = zipfile.ZipFile(p.filename)
|
||||
zipin.extractall(tempdir)
|
||||
|
||||
for subdir, dirs, files in os.walk(tempdir):
|
||||
for f in files:
|
||||
complete_path = os.path.join(subdir, f)
|
||||
inside_p, _ = parser_factory.get_parser(complete_path)
|
||||
if inside_p is None:
|
||||
continue
|
||||
self.assertEqual(inside_p.get_meta(), {})
|
||||
shutil.rmtree(tempdir)
|
||||
|
||||
def __check_zip_meta(self, p):
|
||||
zipin = zipfile.ZipFile(p.filename)
|
||||
for item in zipin.infolist():
|
||||
self.assertEqual(item.comment, b'')
|
||||
self.assertEqual(item.date_time, (1980, 1, 1, 0, 0, 0))
|
||||
self.assertEqual(item.create_system, 3) # 3 is UNIX
|
||||
|
||||
def test_office(self):
|
||||
shutil.copy('./tests/data/dirty.docx', './tests/data/clean.docx')
|
||||
p = office.MSOfficeParser('./tests/data/clean.docx')
|
||||
|
||||
meta = p.get_meta()
|
||||
self.assertIsNotNone(meta)
|
||||
self.assertEqual(meta['word/media/image1.png']['Comment'], 'This is a comment, be careful!')
|
||||
|
||||
ret = p.remove_all()
|
||||
self.assertTrue(ret)
|
||||
|
||||
p = office.MSOfficeParser('./tests/data/clean.cleaned.docx')
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
|
||||
self.__check_zip_meta(p)
|
||||
self.__check_deep_meta(p)
|
||||
|
||||
os.remove('./tests/data/clean.docx')
|
||||
os.remove('./tests/data/clean.cleaned.docx')
|
||||
|
||||
def test_libreoffice(self):
|
||||
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
|
||||
p = office.LibreOfficeParser('./tests/data/clean.odt')
|
||||
|
||||
meta = p.get_meta()
|
||||
self.assertIsNotNone(meta)
|
||||
|
||||
ret = p.remove_all()
|
||||
self.assertTrue(ret)
|
||||
|
||||
p = office.LibreOfficeParser('./tests/data/clean.cleaned.odt')
|
||||
self.assertEqual(p.get_meta(), {})
|
||||
|
||||
self.__check_zip_meta(p)
|
||||
self.__check_deep_meta(p)
|
||||
|
||||
os.remove('./tests/data/clean.odt')
|
||||
os.remove('./tests/data/clean.cleaned.odt')
|
||||
|
||||
|
||||
class TestZipOrder(unittest.TestCase):
|
||||
def test_libreoffice(self):
|
||||
shutil.copy('./tests/data/dirty.odt', './tests/data/clean.odt')
|
||||
p = office.LibreOfficeParser('./tests/data/clean.odt')
|
||||
|
||||
meta = p.get_meta()
|
||||
self.assertIsNotNone(meta)
|
||||
|
||||
is_unordered = False
|
||||
with zipfile.ZipFile('./tests/data/clean.odt') as zin:
|
||||
previous_name = ''
|
||||
for item in zin.infolist():
|
||||
if previous_name == '':
|
||||
if item.filename == 'mimetype':
|
||||
continue
|
||||
previous_name = item.filename
|
||||
continue
|
||||
elif item.filename < previous_name:
|
||||
is_unordered = True
|
||||
break
|
||||
self.assertTrue(is_unordered)
|
||||
|
||||
ret = p.remove_all()
|
||||
self.assertTrue(ret)
|
||||
|
||||
with zipfile.ZipFile('./tests/data/clean.cleaned.odt') as zin:
|
||||
previous_name = ''
|
||||
for item in zin.infolist():
|
||||
if previous_name == '':
|
||||
if item.filename == 'mimetype':
|
||||
continue
|
||||
previous_name = item.filename
|
||||
continue
|
||||
self.assertGreaterEqual(item.filename, previous_name)
|
||||
|
||||
os.remove('./tests/data/clean.odt')
|
||||
os.remove('./tests/data/clean.cleaned.odt')
|
||||
|
||||
class TestRsidRemoval(unittest.TestCase):
|
||||
def test_office(self):
|
||||
shutil.copy('./tests/data/office_revision_session_ids.docx', './tests/data/clean.docx')
|
||||
p = office.MSOfficeParser('./tests/data/clean.docx')
|
||||
|
||||
meta = p.get_meta()
|
||||
self.assertIsNotNone(meta)
|
||||
|
||||
how_many_rsid = False
|
||||
with zipfile.ZipFile('./tests/data/clean.docx') as zin:
|
||||
for item in zin.infolist():
|
||||
if not item.filename.endswith('.xml'):
|
||||
continue
|
||||
num = zin.read(item).decode('utf-8').lower().count('w:rsid')
|
||||
how_many_rsid += num
|
||||
self.assertEqual(how_many_rsid, 11)
|
||||
|
||||
ret = p.remove_all()
|
||||
self.assertTrue(ret)
|
||||
|
||||
with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin:
|
||||
for item in zin.infolist():
|
||||
if not item.filename.endswith('.xml'):
|
||||
continue
|
||||
num = zin.read(item).decode('utf-8').lower().count('w:rsid')
|
||||
self.assertEqual(num, 0)
|
||||
|
||||
os.remove('./tests/data/clean.docx')
|
||||
os.remove('./tests/data/clean.cleaned.docx')
|
||||
|
||||
|
||||
class TestNsidRemoval(unittest.TestCase):
|
||||
def test_office(self):
|
||||
shutil.copy('./tests/data/dirty_with_nsid.docx', './tests/data/clean.docx')
|
||||
p = office.MSOfficeParser('./tests/data/clean.docx')
|
||||
|
||||
meta = p.get_meta()
|
||||
self.assertIsNotNone(meta)
|
||||
|
||||
how_many_rsid = False
|
||||
with zipfile.ZipFile('./tests/data/clean.docx') as zin:
|
||||
for item in zin.infolist():
|
||||
if not item.filename.endswith('.xml'):
|
||||
continue
|
||||
num = zin.read(item).decode('utf-8').lower().count('w:rsid')
|
||||
how_many_rsid += num
|
||||
self.assertEqual(how_many_rsid, 1190)
|
||||
|
||||
ret = p.remove_all()
|
||||
self.assertTrue(ret)
|
||||
|
||||
with zipfile.ZipFile('./tests/data/clean.cleaned.docx') as zin:
|
||||
for item in zin.infolist():
|
||||
if not item.filename.endswith('.xml'):
|
||||
continue
|
||||
num = zin.read(item).decode('utf-8').lower().count('w:nsid')
|
||||
self.assertEqual(num, 0)
|
||||
|
||||
os.remove('./tests/data/clean.docx')
|
||||
os.remove('./tests/data/clean.cleaned.docx')
|
80
tests/test_lightweight_cleaning.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import unittest
|
||||
import shutil
|
||||
import os
|
||||
|
||||
from libmat2 import pdf, images, torrent
|
||||
|
||||
|
||||
class TestLightWeightCleaning(unittest.TestCase):
|
||||
data = [{
|
||||
'name': 'pdf',
|
||||
'parser': pdf.PDFParser,
|
||||
'meta': {'producer': 'pdfTeX-1.40.14'},
|
||||
'expected_meta': {'creation-date': -1, 'format': 'PDF-1.5', 'mod-date': -1},
|
||||
}, {
|
||||
'name': 'png',
|
||||
'parser': images.PNGParser,
|
||||
'meta': {'Comment': 'This is a comment, be careful!'},
|
||||
'expected_meta': {},
|
||||
}, {
|
||||
'name': 'jpg',
|
||||
'parser': images.JPGParser,
|
||||
'meta': {'Comment': 'Created with GIMP'},
|
||||
'expected_meta': {},
|
||||
#}, {
|
||||
# 'name': 'webp',
|
||||
# 'parser': images.WEBPParser,
|
||||
# 'meta': {'Warning': '[minor] Improper EXIF header'},
|
||||
# 'expected_meta': {},
|
||||
}, {
|
||||
'name': 'torrent',
|
||||
'parser': torrent.TorrentParser,
|
||||
'meta': {'created by': b'mktorrent 1.0'},
|
||||
'expected_meta': {},
|
||||
},{
|
||||
'name': 'tiff',
|
||||
'parser': images.TiffParser,
|
||||
'meta': {'ImageDescription': 'OLYMPUS DIGITAL CAMERA '},
|
||||
'expected_meta': {
|
||||
'ResolutionUnit': 'inches',
|
||||
'XResolution': 72,
|
||||
'YResolution': 72
|
||||
}
|
||||
},
|
||||
]
|
||||
|
||||
def test_all(self):
|
||||
for case in self.data:
|
||||
target = './tests/data/clean.' + case['name']
|
||||
shutil.copy('./tests/data/dirty.' + case['name'], target)
|
||||
p1 = case['parser'](target)
|
||||
|
||||
meta = p1.get_meta()
|
||||
for k, v in case['meta'].items():
|
||||
self.assertEqual(meta[k], v)
|
||||
|
||||
p1.lightweight_cleaning = True
|
||||
self.assertTrue(p1.remove_all())
|
||||
|
||||
p2 = case['parser'](p1.output_filename)
|
||||
self.assertEqual(p2.get_meta(), case['expected_meta'])
|
||||
|
||||
os.remove(target)
|
||||
os.remove(p1.output_filename)
|
||||
|
||||
def test_exiftool_overwrite(self):
|
||||
target = './tests/data/clean.png'
|
||||
shutil.copy('./tests/data/dirty.png', target)
|
||||
|
||||
p1 = images.PNGParser(target)
|
||||
p1.lightweight_cleaning = True
|
||||
shutil.copy('./tests/data/dirty.png', p1.output_filename)
|
||||
self.assertTrue(p1.remove_all())
|
||||
|
||||
p2 = images.PNGParser(p1.output_filename)
|
||||
self.assertEqual(p2.get_meta(), {})
|
||||
|
||||
os.remove(target)
|
||||
os.remove(p1.output_filename)
|
32
tests/test_policy.py
Normal file
@@ -0,0 +1,32 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import unittest
|
||||
import shutil
|
||||
import os
|
||||
|
||||
from libmat2 import office, UnknownMemberPolicy
|
||||
|
||||
class TestPolicy(unittest.TestCase):
|
||||
target = './tests/data/clean.docx'
|
||||
|
||||
def test_policy_omit(self):
|
||||
shutil.copy('./tests/data/embedded.docx', self.target)
|
||||
p = office.MSOfficeParser(self.target)
|
||||
p.unknown_member_policy = UnknownMemberPolicy.OMIT
|
||||
self.assertTrue(p.remove_all())
|
||||
os.remove(p.filename)
|
||||
|
||||
def test_policy_keep(self):
|
||||
shutil.copy('./tests/data/embedded.docx', self.target)
|
||||
p = office.MSOfficeParser(self.target)
|
||||
p.unknown_member_policy = UnknownMemberPolicy.KEEP
|
||||
self.assertTrue(p.remove_all())
|
||||
os.remove(p.filename)
|
||||
os.remove(p.output_filename)
|
||||
|
||||
def test_policy_unknown(self):
|
||||
shutil.copy('./tests/data/embedded.docx', self.target)
|
||||
p = office.MSOfficeParser(self.target)
|
||||
with self.assertRaises(ValueError):
|
||||
p.unknown_member_policy = UnknownMemberPolicy('unknown_policy_name_totally_invalid')
|
||||
os.remove(p.filename)
|