mirror of
https://github.com/MarginaliaSearch/MarginaliaSearch.git
synced 2025-10-06 07:32:38 +02:00
Compare commits
2379 Commits
old-master
...
deploy-016
Author | SHA1 | Date | |
---|---|---|---|
|
9c8e9a68d5 | ||
|
413d5cc788 | ||
|
58539b92ac | ||
|
fe72f16df1 | ||
|
b49a244a2e | ||
|
3f0b4c010f | ||
|
c6e0cd93f7 | ||
|
80a7ccb080 | ||
|
54dec347c4 | ||
|
d6ee3f0785 | ||
|
8be88afcf3 | ||
|
0e3c00d3e1 | ||
|
4279a7f1aa | ||
|
251006d4f9 | ||
|
c3e99dc12a | ||
|
aaaa2de022 | ||
|
fc1388422a | ||
|
b07080db16 | ||
|
e9d86dca4a | ||
|
1d693f0efa | ||
|
5874a163dc | ||
|
5ec7a1deab | ||
|
7fea2808ed | ||
|
8da74484f0 | ||
|
923d5a7234 | ||
|
58f88749b8 | ||
|
77f727a5ba | ||
|
667cfb53dc | ||
|
fe36d4ed20 | ||
|
acf4bef98d | ||
|
2a737c34bb | ||
|
90a577af82 | ||
|
f0c9b935d8 | ||
|
7b5493dd51 | ||
|
c246a59158 | ||
|
0b99781d24 | ||
|
39db9620c1 | ||
|
1781599363 | ||
|
6b2d18fb9b | ||
|
59b1d200ab | ||
|
897010a2cf | ||
|
602af7a77e | ||
|
a7d91c8527 | ||
|
7151602124 | ||
|
884e33bd4a | ||
|
e84d5c497a | ||
|
2d2d3e2466 | ||
|
647dd9b12f | ||
|
de4e2849ce | ||
|
3c43f1954e | ||
|
fa2462ec39 | ||
|
f4ad7145db | ||
|
068b450180 | ||
|
05b909a21f | ||
|
3d179cddce | ||
|
1a2aae496a | ||
|
353cdffb3f | ||
|
2e3f1313c7 | ||
|
58e6f141ce | ||
|
500f63e921 | ||
|
6dfbedda1e | ||
|
9715ddb105 | ||
|
1fc6313a77 | ||
|
b1249d5b8a | ||
|
ef95d59b07 | ||
|
acdd8664f5 | ||
|
6b12eac58a | ||
|
bb3f1f395a | ||
|
b661beef41 | ||
|
9888c47f19 | ||
|
dcef7e955b | ||
|
b3973a1dd7 | ||
|
8bd05d6d90 | ||
|
59df8e356e | ||
|
7161162a35 | ||
|
d7c4c5141f | ||
|
88e9b8fb05 | ||
|
b6265cee11 | ||
|
c91af247e9 | ||
|
7a31227de1 | ||
|
4f477604c5 | ||
|
2970f4395b | ||
|
d1ec909b36 | ||
|
c67c5bbf42 | ||
|
ecb0e57a1a | ||
|
8c61f61b46 | ||
|
662a18c933 | ||
|
1c2426a052 | ||
|
34df7441ac | ||
|
5387e2bd80 | ||
|
0f3b24d0f8 | ||
|
a732095d2a | ||
|
6607f0112f | ||
|
4913730de9 | ||
|
1db64f9d56 | ||
|
4dcff14498 | ||
|
426658f64e | ||
|
2181b22f05 | ||
|
42bd79a609 | ||
|
b91c1e528a | ||
|
b1130d7a04 | ||
|
8364bcdc97 | ||
|
626cab5fab | ||
|
cfd4712191 | ||
|
9f18ced73d | ||
|
18e91269ab | ||
|
e315ca5758 | ||
|
3ceea17c1d | ||
|
b34527c1a3 | ||
|
185bf28fca | ||
|
78cc25584a | ||
|
62ba30bacf | ||
|
3bb84eb206 | ||
|
be7d13ccce | ||
|
8c088a7c0b | ||
|
ea9a642b9b | ||
|
27f528af6a | ||
|
20ca41ec95 | ||
|
7671f0d9e4 | ||
|
44d6bc71b7 | ||
|
9d302e2973 | ||
|
f553701224 | ||
|
f076d05595 | ||
|
b513809710 | ||
|
7519b28e21 | ||
|
3eac4dd57f | ||
|
4c2810720a | ||
|
8480ba8daa | ||
|
fbba392491 | ||
|
530eb35949 | ||
|
c2dd2175a2 | ||
|
b8581b0f56 | ||
|
2ea34767d8 | ||
|
e9af838231 | ||
|
ae0cad47c4 | ||
|
5fbc8ef998 | ||
|
32c6dd9e6a | ||
|
6ece6a6cfb | ||
|
39cd1c18f8 | ||
|
eb65daaa88 | ||
|
0bebdb6e33 | ||
|
1e50e392c6 | ||
|
fb673de370 | ||
|
eee73ab16c | ||
|
5354e034bf | ||
|
72384ad6ca | ||
|
a2b076f9be | ||
|
c8b0a32c0f | ||
|
f0d74aa3bb | ||
|
74a1f100f4 | ||
|
eb049658e4 | ||
|
db138b2a6f | ||
|
1673fc284c | ||
|
503ea57d5b | ||
|
18ca926c7f | ||
|
db99242db2 | ||
|
2b9d2985ba | ||
|
eeb6ecd711 | ||
|
1f58aeadbf | ||
|
3d68be64da | ||
|
668f3b16ef | ||
|
98a340a0d1 | ||
|
8862100f7e | ||
|
274941f6de | ||
|
abec83582d | ||
|
569520c9b6 | ||
|
088310e998 | ||
|
270cab874b | ||
|
4c74e280d3 | ||
|
5b347e17ac | ||
|
55d6ab933f | ||
|
43b74e9706 | ||
|
579a115243 | ||
|
2c67f50a43 | ||
|
78a958e2b0 | ||
|
4e939389b2 | ||
|
e67a9bdb91 | ||
|
567e4e1237 | ||
|
4342e42722 | ||
|
bc818056e6 | ||
|
de2feac238 | ||
|
1e770205a5 | ||
|
e44ecd6d69 | ||
|
5b93a0e633 | ||
|
08fb0e5efe | ||
|
bcf67782ea | ||
|
ef3f175ede | ||
|
bbe4b5d9fd | ||
|
c67a635103 | ||
|
20b24133fb | ||
|
f2567677e8 | ||
|
bc2c2061f2 | ||
|
1c7f5a31a5 | ||
|
59a8ea60f7 | ||
|
aa9b1244ea | ||
|
2d17233366 | ||
|
b245cc9f38 | ||
|
6614d05bdf | ||
|
55aeb03c4a | ||
|
faa589962f | ||
|
c7edd6b39f | ||
|
79da622e3b | ||
|
3da8337ba6 | ||
|
a32d230f0a | ||
|
3772bfd387 | ||
|
02a7900d1a | ||
|
a1fb92468f | ||
|
b7f0a2a98e | ||
|
5fb76b2e79 | ||
|
ad8c97f342 | ||
|
dc1b6373eb | ||
|
983d6d067c | ||
|
a84a06975c | ||
|
d2864c13ec | ||
|
03ba53ce51 | ||
|
d4a6684931 | ||
|
6f0485287a | ||
|
59e2dd4c26 | ||
|
ca1807caae | ||
|
26c20e18ac | ||
|
7c90b6b414 | ||
|
b63c54c4ce | ||
|
fecd2f4ec3 | ||
|
39e420de88 | ||
|
dc83619861 | ||
|
87d1c89701 | ||
|
a42a7769e2 | ||
|
202bda884f | ||
|
2315fdc731 | ||
|
b5469bd8a1 | ||
|
6a6318d04c | ||
|
55933f8d40 | ||
|
be6382e0d0 | ||
|
45e771f96b | ||
|
8dde502cc9 | ||
|
3e66767af3 | ||
|
9ec9d1b338 | ||
|
dcad0d7863 | ||
|
94e1aa0baf | ||
|
b62f043910 | ||
|
6ea22d0d21 | ||
|
8c69dc31b8 | ||
|
00734ea87f | ||
|
3009713db4 | ||
|
9b2ceaf37c | ||
|
8019c2ce18 | ||
|
a9e312b8b1 | ||
|
4da3563d8a | ||
|
48d0a3089a | ||
|
594df64b20 | ||
|
06efb5abfc | ||
|
78eb1417a7 | ||
|
8c8f2ad5ee | ||
|
f71e79d10f | ||
|
1b27c5cf06 | ||
|
67edc8f90d | ||
|
5f576b7d0c | ||
|
8b05c788fd | ||
|
236f033bc9 | ||
|
510fc75121 | ||
|
0376f2e6e3 | ||
|
0b65164f60 | ||
|
9be477de33 | ||
|
84f55b84ff | ||
|
ab5c30ad51 | ||
|
0c839453c5 | ||
|
5e4c5d03ae | ||
|
710af4999a | ||
|
a5b0a1ae62 | ||
|
e9f71ee39b | ||
|
baeb4a46cd | ||
|
5e2a8e9f27 | ||
|
cc1a5bdf90 | ||
|
7f7b1ffaba | ||
|
0ea8092350 | ||
|
483d29497e | ||
|
bae44497fe | ||
|
0d59202aca | ||
|
0ca43f0c9c | ||
|
3bc99639a0 | ||
|
927bc0b63c | ||
|
d968801dc1 | ||
|
89db69d360 | ||
|
895cee7004 | ||
|
4bb71b8439 | ||
|
e4a41f7dd1 | ||
|
69ad6287b1 | ||
|
81cdd6385d | ||
|
e76c42329f | ||
|
e6ef4734ea | ||
|
41a59dcf45 | ||
|
df4bc1d7e9 | ||
|
2b222efa75 | ||
|
94d4d2edb7 | ||
|
7ae19a92ba | ||
|
56d14e56d7 | ||
|
a557c7ae7f | ||
|
b66879ccb1 | ||
|
f1b7157ca2 | ||
|
7622335e84 | ||
|
0da2047eae | ||
|
5ee4321110 | ||
|
9459b9933b | ||
|
87fb564f89 | ||
|
5ca8523220 | ||
|
1118657ffd | ||
|
b1f970152d | ||
|
e1783891ab | ||
|
64d32471dd | ||
|
232cc465d9 | ||
|
8c963bd4ba | ||
|
6a079c1c75 | ||
|
2dc9f2e639 | ||
|
b66fb9caf6 | ||
|
6d18e6d840 | ||
|
2a3c63f209 | ||
|
9f70cecaef | ||
|
47e58a21c6 | ||
|
3714104976 | ||
|
f6f036b9b1 | ||
|
b510b7feb8 | ||
|
c08203e2ed | ||
|
86497fd32f | ||
|
3b998573fd | ||
|
e161882ec7 | ||
|
357f349e30 | ||
|
e4769f541d | ||
|
2a173e2861 | ||
|
a6a900266c | ||
|
bdba53f055 | ||
|
eb2fe18867 | ||
|
a7468c8d23 | ||
|
fb2beb1eac | ||
|
0fb03e3d62 | ||
|
67db3f295e | ||
|
dafaab3ef7 | ||
|
3f11ca409f | ||
|
694eed79ef | ||
|
4220169119 | ||
|
bbdde789e7 | ||
|
0a53ac68a0 | ||
|
eab61cd48a | ||
|
e65d75a0f9 | ||
|
3b99cffb3d | ||
|
a97c05107e | ||
|
5002870d1f | ||
|
73861e613f | ||
|
0ce2ba9ad9 | ||
|
3ddcebaa36 | ||
|
b91463383e | ||
|
7444a2f36c | ||
|
461bc3eb1a | ||
|
cf7f84f033 | ||
|
fdee07048d | ||
|
2fbf201761 | ||
|
4018e4c434 | ||
|
f3382b5bd8 | ||
|
9fc82574f0 | ||
|
589f4dafb9 | ||
|
c5d657ef98 | ||
|
3c2bb566da | ||
|
9287ee0141 | ||
|
2769c8f869 | ||
|
ddb66f33ba | ||
|
79500b8fbc | ||
|
187eea43a4 | ||
|
a89ed6fa9f | ||
|
e0c0ed27bc | ||
|
20abb91657 | ||
|
291ca8daf1 | ||
|
8d168be138 | ||
|
6e1aa7b391 | ||
|
deab9b9516 | ||
|
39d99a906a | ||
|
6f72e6e0d3 | ||
|
d786d79483 | ||
|
01510f6c2e | ||
|
7ba43e9e3f | ||
|
97bfcd1353 | ||
|
aa3c85c196 | ||
|
ee2d5496d0 | ||
|
5c858a2b94 | ||
|
fb75a3827d | ||
|
7d546d0e2a | ||
|
8fcb6ffd7a | ||
|
f97de0c15a | ||
|
be9e192b78 | ||
|
75ae1c9526 | ||
|
33761a0236 | ||
|
19b69b1764 | ||
|
8b804359a9 | ||
|
f050bf5c4c | ||
|
fdc3efa250 | ||
|
5fdd2c71f8 | ||
|
c97c66a41c | ||
|
7b64377fd6 | ||
|
e11ebf18e5 | ||
|
ba47d72bf4 | ||
|
52bc0272f8 | ||
|
d4bce13a03 | ||
|
b9842b57e0 | ||
|
95776e9bee | ||
|
077d8dcd11 | ||
|
9ec41e27c6 | ||
|
200743c84f | ||
|
6d7998e349 | ||
|
7d1ef08a0f | ||
|
ea6b148df2 | ||
|
3ec9c4c5fa | ||
|
0b6b5dab07 | ||
|
ff17473105 | ||
|
dc5f97e737 | ||
|
d919179ba3 | ||
|
f09669a5b0 | ||
|
b3b0f6fed3 | ||
|
88caca60f9 | ||
|
923ebbac81 | ||
|
df298df852 | ||
|
552b246099 | ||
|
80e6d0069c | ||
|
b941604135 | ||
|
52eb5bc84f | ||
|
4d23fe6261 | ||
|
14519294d2 | ||
|
51e46ad2b0 | ||
|
665c8831a3 | ||
|
47dfbacb00 | ||
|
f94911541a | ||
|
89d8af640d | ||
|
6e4252cf4c | ||
|
79ce4de2ab | ||
|
d6575dfee4 | ||
|
a91ab4c203 | ||
|
6a3079a167 | ||
|
c728a1e2f2 | ||
|
d874d76a09 | ||
|
70bc8831f5 | ||
|
41c11be075 | ||
|
163ce19846 | ||
|
9eb16cb667 | ||
|
af40fa327b | ||
|
cf6d28e71e | ||
|
3791ea1e18 | ||
|
34258b92d1 | ||
|
e5db3f11e1 | ||
|
9f47ce8d15 | ||
|
a5b4951f23 | ||
|
8b8bf0748f | ||
|
5cc71ae586 | ||
|
33fcfe4b63 | ||
|
a31a3b53c4 | ||
|
a456ec9599 | ||
|
a2bc9a98c0 | ||
|
e24a98390c | ||
|
6f858cd627 | ||
|
a293266ccd | ||
|
b8e0dc93d7 | ||
|
d774c39031 | ||
|
ab17af99da | ||
|
b0ac3c586f | ||
|
139fa85b18 | ||
|
bfeb9a4538 | ||
|
3d6c79ae5f | ||
|
c9e9f73ea9 | ||
|
80e482b155 | ||
|
9351593495 | ||
|
d74436f546 | ||
|
76e9053dd0 | ||
|
dbb8bcdd8e | ||
|
7305afa0f8 | ||
|
481f999b70 | ||
|
4b16022556 | ||
|
89dd201a7b | ||
|
ab486323f2 | ||
|
6460c11107 | ||
|
89f7f3c17c | ||
|
fe800b3af7 | ||
|
2a1077ff43 | ||
|
01a16ff388 | ||
|
eb60ddb729 | ||
|
db5faeceee | ||
|
45d3e6aa71 | ||
|
d84a2c183f | ||
|
ecb5eedeae | ||
|
90a2d4ae38 | ||
|
2b8ab97ec1 | ||
|
43ca9c8a12 | ||
|
69d99c91dd | ||
|
a8cc98a0f6 | ||
|
2ee58f4bc9 | ||
|
938431e514 | ||
|
b2de3c70fa | ||
|
542690d9f6 | ||
|
596a7fb4ea | ||
|
c3f726a01f | ||
|
4538ade156 | ||
|
f4709d8f32 | ||
|
3dda8c228c | ||
|
ccf6b7caf3 | ||
|
fed33ed64a | ||
|
ca27d95ce1 | ||
|
3566fe296a | ||
|
c91435e314 | ||
|
31f30069a4 | ||
|
e5726a75d2 | ||
|
c757d116bf | ||
|
23cce0c78a | ||
|
1bd29a586c | ||
|
4565bfe359 | ||
|
336d6fdd14 | ||
|
95cde242ca | ||
|
9224176202 | ||
|
0d2390fd13 | ||
|
4a0356e26f | ||
|
73f973cc06 | ||
|
e9e8580913 | ||
|
8b85a58fea | ||
|
40512511af | ||
|
10d8fc4fe7 | ||
|
9899d45ea8 | ||
|
3eea471ca6 | ||
|
3dec4b6b34 | ||
|
162fc25ebc | ||
|
e9854f194c | ||
|
9c292a4f62 | ||
|
edb42836da | ||
|
1ff88ff0bc | ||
|
28e7c8e5e0 | ||
|
463b3ed0ce | ||
|
8e78286068 | ||
|
f4eeef145e | ||
|
87aa869338 | ||
|
60ad4786bc | ||
|
a74df7f905 | ||
|
9f9c6736ab | ||
|
b95646625f | ||
|
6e47eae903 | ||
|
934af0dd4b | ||
|
a8bec13ed9 | ||
|
1cf62f5850 | ||
|
8047e77757 | ||
|
2a92de29ce | ||
|
99523ca079 | ||
|
35f49bbb60 | ||
|
50ec922c2b | ||
|
cfbbeaa26e | ||
|
a3b0189934 | ||
|
8f367d96f8 | ||
|
f78ef36cd4 | ||
|
dc67c81f99 | ||
|
50ba8fd099 | ||
|
99b3b00b68 | ||
|
f6d981761d | ||
|
8290c19e24 | ||
|
7a69dff6cf | ||
|
bfb7ed2c99 | ||
|
e19dc9b13e | ||
|
74148c790e | ||
|
3d77456110 | ||
|
ab6a4b1749 | ||
|
aeeb1d0cb7 | ||
|
185b79f2a5 | ||
|
8d0f9652c7 | ||
|
5353805cc6 | ||
|
5407da5650 | ||
|
b1bfe6f76e | ||
|
74e25370ca | ||
|
bb5d946c26 | ||
|
abab5bdc8a | ||
|
30bf845c81 | ||
|
77efce0673 | ||
|
67a98fb0b0 | ||
|
7d471ec30d | ||
|
f3182a9264 | ||
|
805cb5ad58 | ||
|
fdf05cedae | ||
|
9c5f463775 | ||
|
893fae6d59 | ||
|
5660f291af | ||
|
efd56efc63 | ||
|
d94373f4b1 | ||
|
0d01a48260 | ||
|
00ab2684fa | ||
|
a5585110a6 | ||
|
965c89798e | ||
|
982b03382b | ||
|
24b805472a | ||
|
6ce029b317 | ||
|
63e5b0ab18 | ||
|
6dda2c2d83 | ||
|
3fb3c0b92e | ||
|
aa2c960b74 | ||
|
4fbcc02f96 | ||
|
9aa8f13731 | ||
|
65bee366dc | ||
|
53700e6667 | ||
|
7f498e10b7 | ||
|
6eb0f13411 | ||
|
773377fe84 | ||
|
4372c8c835 | ||
|
099133bdbc | ||
|
b09e2dbeb7 | ||
|
96bcf03ad5 | ||
|
0999f07320 | ||
|
5d2b455572 | ||
|
ea75ddc0e0 | ||
|
2db0e446cb | ||
|
557bdaa694 | ||
|
9eb1f120fc | ||
|
266d6e4bea | ||
|
e4c97a91d8 | ||
|
b0a874a842 | ||
|
bca40de107 | ||
|
93652e0937 | ||
|
0a383a712d | ||
|
03d5dec24c | ||
|
b2a3cac351 | ||
|
a18edad04c | ||
|
92522e8d97 | ||
|
049d94ce31 | ||
|
dbc6a95276 | ||
|
75b0888032 | ||
|
2ad93ad41a | ||
|
623ee5570f | ||
|
fd2bad39f3 | ||
|
e6c8a6febe | ||
|
4ece5f847b | ||
|
e4f04af044 | ||
|
b730b17f52 | ||
|
98c40958ab | ||
|
41b52f5bcd | ||
|
4264fb9f49 | ||
|
016a4c62e1 | ||
|
2f38c95886 | ||
|
df89661ed2 | ||
|
41da4f422d | ||
|
2e89b55593 | ||
|
7babdb87d5 | ||
|
680ad19c7d | ||
|
f01267bc6b | ||
|
df6a05b9a7 | ||
|
8569bb8e11 | ||
|
ca6e2db2b9 | ||
|
2080e31616 | ||
|
c379be846c | ||
|
9bc665628b | ||
|
ee49c01d86 | ||
|
b21f8538a8 | ||
|
dd15676d33 | ||
|
ec5a17ad13 | ||
|
e48f52faba | ||
|
8462e88b8f | ||
|
bf26ead010 | ||
|
c2cedfa83c | ||
|
eba2844361 | ||
|
c6c8b059bf | ||
|
d8a99784e5 | ||
|
57929ff242 | ||
|
4430a39120 | ||
|
6228f46af1 | ||
|
ac67b6b5da | ||
|
1a268c24c8 | ||
|
38e2089c3f | ||
|
e2107901ec | ||
|
15745b692e | ||
|
696fd8909d | ||
|
02b1c4b172 | ||
|
285e657f68 | ||
|
046ffc7752 | ||
|
2ef66ce0ca | ||
|
dc5c668940 | ||
|
f19148132a | ||
|
6d7b886aaa | ||
|
b316b55be9 | ||
|
80900107f7 | ||
|
7e4efa45b8 | ||
|
86ea28d6bc | ||
|
34703da144 | ||
|
1282f78bc5 | ||
|
2d5d965f7f | ||
|
afe56c7cf1 | ||
|
7d51cf882f | ||
|
499deac2ef | ||
|
9685993adb | ||
|
261dcdadc8 | ||
|
314a901bf0 | ||
|
1caad7e19e | ||
|
e585116dab | ||
|
40f42bf654 | ||
|
eaf7fbb9e9 | ||
|
d05a2e57e9 | ||
|
f8684118f3 | ||
|
2e1f669aea | ||
|
6c3abff664 | ||
|
dcb43a3308 | ||
|
ec600b967d | ||
|
aebb2652e8 | ||
|
52a9a0d410 | ||
|
4123e99469 | ||
|
51a8a242ac | ||
|
60ef826e07 | ||
|
2ad564404e | ||
|
2bb9f18411 | ||
|
7a1edc0880 | ||
|
b812e96c6d | ||
|
22b35d5d91 | ||
|
d36055a2d0 | ||
|
0d227f3543 | ||
|
accc598967 | ||
|
02c4a2d4ba | ||
|
6665e447aa | ||
|
7eb955cc42 | ||
|
f4d79c203d | ||
|
4d29581ea4 | ||
|
0b31c4cfbb | ||
|
5c098005cc | ||
|
ae87e41cec | ||
|
dfd19b5eb9 | ||
|
8ed5b51a32 | ||
|
9d0e5dee02 | ||
|
ffd970036d | ||
|
fa162698c2 | ||
|
ad3857938d | ||
|
179a6002c2 | ||
|
d28fc86956 | ||
|
6303977e9c | ||
|
97695693f2 | ||
|
1ab875a75d | ||
|
31881874a9 | ||
|
f090f0101b | ||
|
9881cac2da | ||
|
12590d3449 | ||
|
abf7a8d78d | ||
|
ecfe17521a | ||
|
0d29e2a39d | ||
|
12a2ab93db | ||
|
d90bd340bb | ||
|
21afe94096 | ||
|
fa36689597 | ||
|
85c99ae808 | ||
|
a4ecd5f4ce | ||
|
6401a513d7 | ||
|
d86926be5f | ||
|
a6b03a66dc | ||
|
d023e399d2 | ||
|
e8ab1e14e0 | ||
|
a6e15cb338 | ||
|
4fbb863a10 | ||
|
6ee4d1eb90 | ||
|
738e0e5fed | ||
|
0e4dd3d76d | ||
|
10fe5a78cb | ||
|
975b8ae2e9 | ||
|
935234939c | ||
|
87e38e6181 | ||
|
f73fc8dd57 | ||
|
3faa5bf521 | ||
|
6973712480 | ||
|
02df421c94 | ||
|
95b9af92a0 | ||
|
8ee64c0771 | ||
|
b805f6daa8 | ||
|
dae22ccbe0 | ||
|
9d00243d7f | ||
|
5461634616 | ||
|
40bca93884 | ||
|
b798f28443 | ||
|
fff2ce5721 | ||
|
69f88255e9 | ||
|
08ff79827e | ||
|
67703e2274 | ||
|
d0d6bb173c | ||
|
54caf17107 | ||
|
2168b7cf7d | ||
|
90744433c9 | ||
|
5371f078f7 | ||
|
0dd14a4bd0 | ||
|
9974b31a09 | ||
|
0ffbbaf4b9 | ||
|
6839415a0b | ||
|
55f3ac4846 | ||
|
801cf4b5da | ||
|
e0459d0c0d | ||
|
23759a7243 | ||
|
55b2b7636b | ||
|
36160988e2 | ||
|
9f982a0c3d | ||
|
dcbec9414f | ||
|
a07cf1ba93 | ||
|
4a8afa6b9f | ||
|
bb06cc9ff3 | ||
|
9c06f446fb | ||
|
2d076cbd67 | ||
|
fb2eef24d6 | ||
|
e2f68d9ccf | ||
|
d4f4d751c0 | ||
|
b4eac2516e | ||
|
4435f6245c | ||
|
9b922af075 | ||
|
0112ae725c | ||
|
619392edf9 | ||
|
0894822b68 | ||
|
206a7ce6c1 | ||
|
a69ab311c7 | ||
|
a61327fa0b | ||
|
6985ab762a | ||
|
0e8300979b | ||
|
0b60411e5f | ||
|
f83f777fff | ||
|
89aae93e60 | ||
|
65b74f9cab | ||
|
7543e98035 | ||
|
59ec70eb73 | ||
|
365229991b | ||
|
959a8e29ee | ||
|
197c82acd4 | ||
|
9539fdb53c | ||
|
5659df4388 | ||
|
24bf29d369 | ||
|
17dc00d05f | ||
|
4fcd4a8197 | ||
|
daf2a8df54 | ||
|
43489c98d8 | ||
|
88997a1c4f | ||
|
d12c77305c | ||
|
ab4e2b222e | ||
|
b867eadbef | ||
|
19163fa883 | ||
|
a7c33809c4 | ||
|
650f3843bb | ||
|
9e766bc056 | ||
|
48aff52e00 | ||
|
9d7616317e | ||
|
d227a09fb1 | ||
|
f48cf77c4d | ||
|
3549be216f | ||
|
c3e3a3dbc5 | ||
|
55a7c1db00 | ||
|
bb315221ab | ||
|
c38766c5a6 | ||
|
c837321df1 | ||
|
af7f6b89ec | ||
|
29a4d3df23 | ||
|
bcbb9afac0 | ||
|
7d1cafc070 | ||
|
5951c67a8b | ||
|
c454007730 | ||
|
4e49cca43d | ||
|
49a8c06095 | ||
|
d01d9fa670 | ||
|
a53a32f006 | ||
|
3548d54cf6 | ||
|
01f242ac7e | ||
|
2840d9d403 | ||
|
9fecfc5025 | ||
|
1b901e01f2 | ||
|
974aa35558 | ||
|
4021a0ae98 | ||
|
b7a95be731 | ||
|
616649f040 | ||
|
ac3c692b5f | ||
|
6087f9635c | ||
|
2ad0bfda1e | ||
|
cf8b12bcdc | ||
|
08f8b6e022 | ||
|
800ed6b1e9 | ||
|
df93e57a9a | ||
|
908535a3a0 | ||
|
7fe2ab6f39 | ||
|
c9ee0c909e | ||
|
38aedb50ac | ||
|
4772e0b59d | ||
|
9c49e876d5 | ||
|
152007cd5c | ||
|
70e2e41955 | ||
|
4d71c776fc | ||
|
0f41105436 | ||
|
2d49071e96 | ||
|
89889ecbbd | ||
|
41576e74d4 | ||
|
c8ee354d0b | ||
|
4e5f069809 | ||
|
6690e9bde8 | ||
|
e4b34b6ee6 | ||
|
3952ef6ca5 | ||
|
463d333846 | ||
|
7eb5e6aa66 | ||
|
282022d64e | ||
|
91a98a8807 | ||
|
32fe864a33 | ||
|
e1c9313396 | ||
|
f430a084e8 | ||
|
a86b596897 | ||
|
6dd87b0378 | ||
|
c9f029c214 | ||
|
6b88db10ad | ||
|
8a891c2159 | ||
|
ad2ac8eee3 | ||
|
f46733a47a | ||
|
934167323d | ||
|
64baa41e64 | ||
|
5165cf6d15 | ||
|
4489b21528 | ||
|
f623b37577 | ||
|
f4a2fea451 | ||
|
a748fc5448 | ||
|
0dcca0cb83 | ||
|
b80a83339b | ||
|
eb74d08f2a | ||
|
e79ab0c70e | ||
|
e419e26f3a | ||
|
6102fd99bf | ||
|
def36719d3 | ||
|
462aa9af26 | ||
|
a09c84e1b8 | ||
|
44b33798f3 | ||
|
2f0b648fad | ||
|
de0e56f027 | ||
|
973ced7b13 | ||
|
cb4b824a85 | ||
|
c583a538b1 | ||
|
e0224085b4 | ||
|
44c1e1d6d9 | ||
|
c620e9c026 | ||
|
1bb88968c5 | ||
|
df75e8f4aa | ||
|
adf846bfd2 | ||
|
1748fcc5ac | ||
|
08416393e0 | ||
|
fce26015c9 | ||
|
155be1078d | ||
|
6efc0f21fe | ||
|
f3255e080d | ||
|
0da03d4cfc | ||
|
5f6a3ef9d0 | ||
|
afc4fed591 | ||
|
cb505f98ef | ||
|
a0b3634cb6 | ||
|
e23359bae9 | ||
|
5531ed632a | ||
|
150ee21f3c | ||
|
c96da0ce1e | ||
|
a0d9e66ff7 | ||
|
55f627ed4c | ||
|
7dd8c78c6b | ||
|
8bf7d090fd | ||
|
6bfe04b609 | ||
|
491d6bec46 | ||
|
4fb86ac692 | ||
|
6cba6aef3b | ||
|
7e216db463 | ||
|
adc90c8f1e | ||
|
e3316a3672 | ||
|
a3a6d6292b | ||
|
8cb9455c32 | ||
|
dc65b2ee01 | ||
|
98a1adbf81 | ||
|
0bd1e15cce | ||
|
eda926767e | ||
|
cd1a18c045 | ||
|
6f567fbea8 | ||
|
0ebadd03a5 | ||
|
2253b556b2 | ||
|
6a7a7009c7 | ||
|
3c75057dcd | ||
|
212d101727 | ||
|
760b80659d | ||
|
04879c005d | ||
|
cb82927756 | ||
|
8b9629f2f6 | ||
|
f6db16b313 | ||
|
4668b1ddcb | ||
|
dcf9d9caad | ||
|
7a69b76001 | ||
|
ac07ef822f | ||
|
e7d4bcd872 | ||
|
a28c6d7cfe | ||
|
d816f048f5 | ||
|
b09ddd0036 | ||
|
0a73b02a00 | ||
|
8769704462 | ||
|
214551f1df | ||
|
2cc74c005a | ||
|
ed250f57f2 | ||
|
e92c25f7e0 | ||
|
3ab563f314 | ||
|
426338cb45 | ||
|
5fa2375898 | ||
|
41782a0ab5 | ||
|
9b06433b82 | ||
|
def607d840 | ||
|
2b811fb422 | ||
|
36cc62c10c | ||
|
975d92912c | ||
|
8bbaf457de | ||
|
7641a02f31 | ||
|
ce16239e34 | ||
|
d64bd227cf | ||
|
c5ab0a9054 | ||
|
dac948973d | ||
|
9d008d1d6f | ||
|
f52457213e | ||
|
579295a673 | ||
|
af8ff8ce99 | ||
|
7fa3e86e64 | ||
|
3359f72239 | ||
|
41fa154aa6 | ||
|
deaba0152d | ||
|
feaef6093e | ||
|
078fa4fdd0 | ||
|
2dc77a0638 | ||
|
cfd9a7187f | ||
|
f434a8b492 | ||
|
d2658d6f84 | ||
|
8c559c8121 | ||
|
2353c73c57 | ||
|
599e719ad4 | ||
|
b6d365bacd | ||
|
52f0c0d336 | ||
|
be55f3f937 | ||
|
fda1c05164 | ||
|
1329d4abd8 | ||
|
f064992137 | ||
|
8a81a480a1 | ||
|
d729c400e5 | ||
|
ad4810d991 | ||
|
6a67043537 | ||
|
864d6c28e7 | ||
|
bb6b51ad91 | ||
|
65e3caf402 | ||
|
b7d9a7ae89 | ||
|
ed73d79ec1 | ||
|
c538c25008 | ||
|
4b47fadbab | ||
|
fcdc843c15 | ||
|
dbdcf459a7 | ||
|
ef25d60666 | ||
|
7f7021ce64 | ||
|
448a941de2 | ||
|
5766da69ec | ||
|
617e633d7a | ||
|
b770a1143f | ||
|
e1151ecf2a | ||
|
ae7c760772 | ||
|
81815f3e0a | ||
|
3890c413a3 | ||
|
8e02f567d7 | ||
|
87bb93e1d4 | ||
|
e596c929ac | ||
|
9852b0e609 | ||
|
51b0d6c0d3 | ||
|
15391c7a88 | ||
|
fe62593286 | ||
|
4cc11e183c | ||
|
de8e753fc8 | ||
|
f82ebd7716 | ||
|
bd0704d5a4 | ||
|
1968485881 | ||
|
002afca1c5 | ||
|
411b3f3138 | ||
|
a4b810f511 | ||
|
cd8f33f830 | ||
|
824765b1ee | ||
|
9e8138f853 | ||
|
fe8d583fdd | ||
|
0bd3365c24 | ||
|
d8f4e7d72b | ||
|
afc047cd27 | ||
|
00ef4f9803 | ||
|
07e4d7ec6d | ||
|
258a344810 | ||
|
2a03014652 | ||
|
8ae1f08095 | ||
|
57e6a12d08 | ||
|
46423612e3 | ||
|
29bf473d74 | ||
|
9689f3faee | ||
|
93fa58c93d | ||
|
186a98cc99 | ||
|
9993f265ca | ||
|
144f967dbf | ||
|
b31c9bb726 | ||
|
c0820b5e5c | ||
|
65b8a1d5d9 | ||
|
a0648844fb | ||
|
c4a27003c6 | ||
|
41abd8982f | ||
|
86bbc1043e | ||
|
9a045a0588 | ||
|
9415539b38 | ||
|
84bab2783d | ||
|
0d6e7673e4 | ||
|
d78e9e715f | ||
|
a8ec59eb75 | ||
|
20fc0ef13c | ||
|
37ae8cb33c | ||
|
9f1649636e | ||
|
3a65fe8917 | ||
|
99a6e56e99 | ||
|
e696fd9e92 | ||
|
c943954bb4 | ||
|
eaf836dc66 | ||
|
dbf64b0987 | ||
|
8d0af9548b | ||
|
67aa20ea2c | ||
|
5604e9f531 | ||
|
1a51ec2d69 | ||
|
3eb0800742 | ||
|
427f3e922f | ||
|
823ca73a3f | ||
|
7fc0d4d786 | ||
|
b8e336e809 | ||
|
9429bf5c45 | ||
|
f7f0100174 | ||
|
fc00701a1e | ||
|
09447f2ad2 | ||
|
ff0ef1eebc | ||
|
1d34224416 | ||
|
56d35aa596 | ||
|
2201b1a506 | ||
|
5cdb07023b | ||
|
6154e16951 | ||
|
f4ff7185f0 | ||
|
6357d30ea0 | ||
|
8d4ef982d0 | ||
|
4740156cfa | ||
|
f8e7f75831 | ||
|
085137ca63 | ||
|
3fd2a83184 | ||
|
66c1281301 | ||
|
73947d9eca | ||
|
a69c0b2718 | ||
|
6c764bceeb | ||
|
273aeb7bae | ||
|
d185858266 | ||
|
453bd6064b | ||
|
904f2587cd | ||
|
14172312dc | ||
|
c600d7aa47 | ||
|
3c9234078a | ||
|
ee8e0497ae | ||
|
fd5d121648 | ||
|
30bdb4b4e9 | ||
|
2ee492fb74 | ||
|
36a5c8b44c | ||
|
07b625c58d | ||
|
746a865106 | ||
|
f85ec28a16 | ||
|
0307c55f9f | ||
|
d05c916491 | ||
|
c73e43f5c9 | ||
|
e61e7f44b9 | ||
|
f9b6ac03c6 | ||
|
296ccc5f8e | ||
|
8cb5825617 | ||
|
cee707abd8 | ||
|
92717a4832 | ||
|
37a7296759 | ||
|
ebbe49d17b | ||
|
b7e330855f | ||
|
ac89224fb0 | ||
|
9ec262ae00 | ||
|
64acdb5f2a | ||
|
a175b36382 | ||
|
16526d283c | ||
|
752e677555 | ||
|
f796af1ae8 | ||
|
2515993536 | ||
|
66b3e71e56 | ||
|
652d151373 | ||
|
300b1a1b84 | ||
|
6c3b49417f | ||
|
dcc5cfb7c0 | ||
|
d970836605 | ||
|
8021bd0aae | ||
|
8f91156d80 | ||
|
fab36d6e63 | ||
|
3d54879c14 | ||
|
e17fcde865 | ||
|
6950dffcb4 | ||
|
02dd5c5853 | ||
|
5a1087dbf9 | ||
|
7564dfeb7a | ||
|
10bad635a8 | ||
|
7cc8b0fed5 | ||
|
a77846373b | ||
|
bcd0dabb92 | ||
|
9d68062553 | ||
|
e66d0b7431 | ||
|
ba26f6ce84 | ||
|
929caed0b9 | ||
|
8340aa2b6c | ||
|
1188fe3bf0 | ||
|
b15f47d80e | ||
|
ef261cbbd7 | ||
|
06997ff255 | ||
|
9d7df87886 | ||
|
a4b2323ca3 | ||
|
e8de468b0b | ||
|
d83a3bf4e2 | ||
|
f2b39ad055 | ||
|
95d1bd98e4 | ||
|
8acbc6a6b4 | ||
|
467ba5be20 | ||
|
29ddf9e61d | ||
|
92e119cab3 | ||
|
92049ba8e4 | ||
|
54330b9921 | ||
|
d1aeb030f2 | ||
|
f89274d1ea | ||
|
7286596fb4 | ||
|
a2fc83d94e | ||
|
2161799cc3 | ||
|
c88f132057 | ||
|
c6313a5906 | ||
|
eadcdb5bed | ||
|
6e7649b5f7 | ||
|
d986f90074 | ||
|
53c575db3f | ||
|
6dcc20038c | ||
|
fa145f632b | ||
|
785d8deadd | ||
|
93a2d5afbf | ||
|
d60c6b18d4 | ||
|
d1e02569f4 | ||
|
9ce67029ca | ||
|
98f3382cea | ||
|
52a0255814 | ||
|
eb59ac8535 | ||
|
acc2b4e10f | ||
|
6f830f0e08 | ||
|
6edc318597 | ||
|
182c0cf28e | ||
|
0b105b5986 | ||
|
081c7d22bc | ||
|
6aee896657 | ||
|
cae1bad274 | ||
|
1b8b97b8ec | ||
|
0846606b12 | ||
|
245ebcdfc6 | ||
|
1b1e711c93 | ||
|
c088c25b09 | ||
|
958d64720e | ||
|
805afad4fe | ||
|
400f4840ad | ||
|
ee7792596d | ||
|
0081328aca | ||
|
3fff7f6878 | ||
|
f15dd06473 | ||
|
dd26819d66 | ||
|
562012fb22 | ||
|
a6d257df5b | ||
|
41d896ba3e | ||
|
51cdf46645 | ||
|
1eb0adf6d3 | ||
|
40c9d2050f | ||
|
3a325845c7 | ||
|
6a1bfd6270 | ||
|
b91ea1d7ca | ||
|
c5760cd535 | ||
|
91c7960800 | ||
|
2079a5574b | ||
|
27ffb8fa8a | ||
|
22c8fb3f59 | ||
|
964419803a | ||
|
6271d5d544 | ||
|
175bd310f5 | ||
|
67ee6f4126 | ||
|
01b312f14c | ||
|
18638c62de | ||
|
753d000788 | ||
|
19e781b104 | ||
|
aa2df327db | ||
|
321fa94b8f | ||
|
ca80957143 | ||
|
41cdb8f71b | ||
|
304d4c9acf | ||
|
7fd4c092e3 | ||
|
2fe5705542 | ||
|
e968365858 | ||
|
36ad4c7466 | ||
|
5a62b3058f | ||
|
ec8fe9f031 | ||
|
a1df9e886a | ||
|
ce5ae1931d | ||
|
b9445d4f62 | ||
|
fd1eec99b5 | ||
|
e162406d40 | ||
|
c41e68aaab | ||
|
4665af6c42 | ||
|
c0b15427fe | ||
|
f29a9d972d | ||
|
b192373ae7 | ||
|
c042650382 | ||
|
07a916a720 | ||
|
5134044530 | ||
|
4c62065e74 | ||
|
d28fc99119 | ||
|
c9fb45c85f | ||
|
7c6e18f7a7 | ||
|
176b9c9666 | ||
|
ecd9c35233 | ||
|
71e32c57d9 | ||
|
2fefd0e4e3 | ||
|
81eaf79a25 | ||
|
8dea7217a6 | ||
|
c0fb9e17e8 | ||
|
83776a8dce | ||
|
98c0972619 | ||
|
56d832d661 | ||
|
de3a350afe | ||
|
708a741960 | ||
|
0caef1b307 | ||
|
264e2db539 | ||
|
734996002c | ||
|
205e5016e8 | ||
|
a0f28a7f9b | ||
|
14b7680328 | ||
|
f44222ce53 | ||
|
f310ad8d98 | ||
|
d56b394bcc | ||
|
55c9501e57 | ||
|
fad9575154 | ||
|
97e11e1ac9 | ||
|
e6a1e164b2 | ||
|
e4f8f81e89 | ||
|
176b3bb526 | ||
|
b07752fa9b | ||
|
68fd0efbde | ||
|
c80d3eb812 | ||
|
f9320995d6 | ||
|
f592c9f04d | ||
|
bd7970fb1f | ||
|
c47730f2cc | ||
|
41cccfd2aa | ||
|
aff690f7d6 | ||
|
d4b0539d39 | ||
|
cb55273769 | ||
|
fbad625126 | ||
|
e49ba887e9 | ||
|
edc1acbb7e | ||
|
d304c10641 | ||
|
302c53a8e7 | ||
|
ef02b712ad | ||
|
aca217cf9a | ||
|
9e3386dbbb | ||
|
fdec565b34 | ||
|
33c2188c87 | ||
|
b3c8fa74cc | ||
|
e53bb70bef | ||
|
109bec372c | ||
|
5c2561d05d | ||
|
0e970b8037 | ||
|
1694b4d6ef | ||
|
396299c1db | ||
|
71d789aab0 | ||
|
41ca50ff0e | ||
|
6d2e14a656 | ||
|
4078708aea | ||
|
343ea9c6d8 | ||
|
60361f88ed | ||
|
f7560cb1d8 | ||
|
1f66568d59 | ||
|
7af07cef95 | ||
|
41a540a629 | ||
|
f599944942 | ||
|
1e06aee6a2 | ||
|
7bbaedef97 | ||
|
87048511fe | ||
|
c770f0b68b | ||
|
78c00ad512 | ||
|
a19879d494 | ||
|
ac1aca36b0 | ||
|
1f3b89cf28 | ||
|
f732f6ae6f | ||
|
0b9f3d1751 | ||
|
0806aa6dfe | ||
|
32436d099c | ||
|
4ce692ccaf | ||
|
3caa4eed75 | ||
|
c70f508ae8 | ||
|
9e64d7aaf9 | ||
|
72b773f06d | ||
|
5f978b865b | ||
|
57a4f92722 | ||
|
87351e89ca | ||
|
7920c67a48 | ||
|
192e356169 | ||
|
31232e49fb | ||
|
116595d218 | ||
|
9d93a31755 | ||
|
9f7df59945 | ||
|
d2418521a7 | ||
|
9330b5b1d9 | ||
|
faa50bf578 | ||
|
f0d9618dfc | ||
|
310a880fa8 | ||
|
fc6e3b6da0 | ||
|
50771045d0 | ||
|
8f522470ed | ||
|
dc90c9ac65 | ||
|
e46e174b59 | ||
|
7f3f3f577c | ||
|
75d87c73d1 | ||
|
0fe44c9bf2 | ||
|
7a1d20ed0a | ||
|
70c83b60a1 | ||
|
7ba296ccdf | ||
|
0b112cb4d4 | ||
|
68ac8d3e09 | ||
|
f6fa8bd722 | ||
|
6aee27a3f1 | ||
|
401568033c | ||
|
ea73be6831 | ||
|
ba8a75c84b | ||
|
a1f3ccdd6d | ||
|
647d38007f | ||
|
e7dd28b926 | ||
|
b5fc9673d9 | ||
|
a065040323 | ||
|
dec3b1092d | ||
|
407915a86e | ||
|
c488599879 | ||
|
bcecc93e39 | ||
|
ff7d1a250e | ||
|
70f338c3de | ||
|
c847d83011 | ||
|
5ce46a61d4 | ||
|
775974d5ec | ||
|
c7af40c368 | ||
|
00a974a721 | ||
|
7428ba2dd7 | ||
|
b37223c053 | ||
|
24051fec03 | ||
|
f811a29f87 | ||
|
acf7bcc7a6 | ||
|
9707366348 | ||
|
9e5fe71f5b | ||
|
5d1b7da728 | ||
|
3ea1ddae22 | ||
|
1694e9c78c | ||
|
4763077b76 | ||
|
c0eaca220c | ||
|
25d086c4e1 | ||
|
88551043cd | ||
|
f779f760c4 | ||
|
f18f82e229 | ||
|
67ef2b45fa | ||
|
d72e871265 | ||
|
4c9bc13309 | ||
|
84563b0d46 | ||
|
c5aab7e8db | ||
|
1755b646b8 | ||
|
85f906ea53 | ||
|
e1a155a9c8 | ||
|
0454447e41 | ||
|
7b40c0bbee | ||
|
dc773c5c20 | ||
|
b6253b03c2 | ||
|
a5bc29245b | ||
|
bfae478251 | ||
|
a7cd490593 | ||
|
283d2caa81 | ||
|
dd8fb04886 | ||
|
ce8dca7659 | ||
|
5bd3934d22 | ||
|
128f550ee5 | ||
|
3a56a06c4f | ||
|
126ac3816f | ||
|
d02bed1a55 | ||
|
b7ed0ce537 | ||
|
a742503508 | ||
|
33312ab09e | ||
|
c422f0b9fb | ||
|
7797de80e3 | ||
|
c92f1b8df8 | ||
|
bde68ba48b | ||
|
bf44805e69 | ||
|
edf9aa2c23 | ||
|
4801c47273 | ||
|
bcad6492d6 | ||
|
5ab2a22e88 | ||
|
d7bd540683 | ||
|
62954f98de | ||
|
722b56c8ca | ||
|
f3f12058dc | ||
|
3da38d0483 | ||
|
d715b1f9ca | ||
|
e13fa25e11 | ||
|
34d4834ff6 | ||
|
117ddd17d7 | ||
|
6f2bf38f0e | ||
|
320882c34a | ||
|
8bbb533c9a | ||
|
3113b5a551 | ||
|
c0cc05177f | ||
|
0b34d43804 | ||
|
6c7d7427bf | ||
|
54ed3b86ba | ||
|
2001d0f707 | ||
|
0f9cd9c87d | ||
|
2e7db61808 | ||
|
5329968155 | ||
|
2e536e3141 | ||
|
cf935a5331 | ||
|
fa81e5b8ee | ||
|
9fea22b90d | ||
|
0889b6d247 | ||
|
1328bc4938 | ||
|
787a20cbaa | ||
|
a73f1ab0ac | ||
|
30c0dad3ae | ||
|
440e097d78 | ||
|
b74a3ebd85 | ||
|
45987a1d98 | ||
|
8f0950fc44 | ||
|
30bc3f9281 | ||
|
f655ec5a5c | ||
|
84b4158555 | ||
|
91dd45cf64 | ||
|
37af60254f | ||
|
f0e736d4ea | ||
|
e3ebb0c5bb | ||
|
6382f779c3 | ||
|
8ef34883a8 | ||
|
5c46af0edb | ||
|
b6511fbfe2 | ||
|
eccb12b366 | ||
|
d0982e7ba5 | ||
|
fc30da0d48 | ||
|
e6a1052ba7 | ||
|
968dce50fc | ||
|
3bbffd3c22 | ||
|
072b5fcd12 | ||
|
fabffa80f0 | ||
|
064265b0b9 | ||
|
2d5d11645d | ||
|
cc813a5624 | ||
|
156c067f79 | ||
|
b33b013d41 | ||
|
e74e2f705f | ||
|
2e438847fc | ||
|
9301c47d93 | ||
|
20ec58b07f | ||
|
98983c1015 | ||
|
67195592c6 | ||
|
21abfc6424 | ||
|
d1e88df71e | ||
|
f36cfe34ab | ||
|
8a1934008c | ||
|
b41bb9cfcf | ||
|
d58324bbef | ||
|
cbbd45d3e5 | ||
|
b89633ae4b | ||
|
96357e9bfd | ||
|
d530c3096f | ||
|
ae0c1c3f2d | ||
|
0cc2564380 | ||
|
38d20022ad | ||
|
280132dad0 | ||
|
61de4e2789 | ||
|
f9d3455320 | ||
|
2ff64c3c12 | ||
|
902f235b5b | ||
|
97d43a6fa2 | ||
|
9bc65ff0ca | ||
|
6cd6a615fd | ||
|
5639f0653d | ||
|
251174c9a2 | ||
|
42ea87d637 | ||
|
7c8a60b8cf | ||
|
2f4500be5a | ||
|
fa7534a362 | ||
|
a258f0af7a | ||
|
01621c6344 | ||
|
c7934342a6 | ||
|
f5c324c06b | ||
|
f615cf2391 | ||
|
c984a97262 | ||
|
a02c06a837 | ||
|
21d6aa421c | ||
|
e5d274fe1c | ||
|
166a391eae | ||
|
5fb24bb27f | ||
|
5a5430b383 | ||
|
67a1e1c874 | ||
|
4155fbe94c | ||
|
347fe6b7be | ||
|
ff3ceb981e | ||
|
1dafa0c74d | ||
|
09917837d0 | ||
|
dd507a3808 | ||
|
e67dcf4d68 | ||
|
dd9406d0ac | ||
|
6a80ac62a5 | ||
|
98efb08e17 | ||
|
f58a9f46be | ||
|
fd77e62a13 | ||
|
376228e199 | ||
|
8a5b853fae | ||
|
1cbf23e7e7 | ||
|
63554ba171 | ||
|
5de37cb820 | ||
|
e5cee1f46d | ||
|
e9a01caa5c | ||
|
858357a246 | ||
|
ef16502159 | ||
|
29e2c43e01 | ||
|
7aa2f80117 | ||
|
d29f9c4ffd | ||
|
7617b4cbc2 | ||
|
e0c769fd19 | ||
|
ebd10a5f28 | ||
|
2b77184281 | ||
|
e23976f6c4 | ||
|
0b8dc02eba | ||
|
fde1d0677e | ||
|
48986574ae | ||
|
c7a6a71d07 | ||
|
1847845151 | ||
|
7cb92195d1 | ||
|
72afa0341f | ||
|
0152004c42 | ||
|
30ca5046b5 | ||
|
8e9698c9a0 | ||
|
3047e2dd7c | ||
|
a8b9d21f2d | ||
|
c77a5b7cb6 | ||
|
23f2068e33 | ||
|
ffadfb4149 | ||
|
b7e38cfbae | ||
|
659743b39c | ||
|
cbac42bdd1 | ||
|
69758c5859 | ||
|
81bfd7e5fb | ||
|
fd8a5e695d | ||
|
8f74dbdbb4 | ||
|
fd5a7eac87 | ||
|
6bac3c75cb | ||
|
5d6e0e3790 | ||
|
2871a326e6 | ||
|
abb42f0f36 | ||
|
f6fcb04817 | ||
|
b8796d825d | ||
|
e97259aca3 | ||
|
88f49834fd | ||
|
4415f52e18 | ||
|
98d742d634 | ||
|
6c1ca10be7 | ||
|
aeaf2d546a | ||
|
c7cb6664b4 | ||
|
79adba9284 | ||
|
37b7f52f2c | ||
|
c89e0ab255 | ||
|
f613f4f2df | ||
|
a497e4c920 | ||
|
0f637fb722 | ||
|
ba48c8e25b | ||
|
abbadc92a0 | ||
|
97fcbdd6d9 | ||
|
d7686b665e | ||
|
b8855afd10 | ||
|
5de41a3a7f | ||
|
84cdac83d6 | ||
|
436a55ee1e | ||
|
313cc2965c | ||
|
95f74c5ea7 | ||
|
8d1c3c754d | ||
|
72152f9d80 | ||
|
ebd365a128 | ||
|
c130d7cf5f | ||
|
0406e76889 | ||
|
c2b28c0f8d | ||
|
9aa5038756 | ||
|
a860f8f1a8 | ||
|
487c016a32 | ||
|
e4bddb4993 | ||
|
731afcb864 | ||
|
efb73ff4e7 | ||
|
2ed2f35a9b | ||
|
119151cad3 | ||
|
758f9b5aa5 | ||
|
e06a8c1de2 | ||
|
29ce8ca0cf | ||
|
eb4158df0b | ||
|
12fda1a36b | ||
|
e927f99777 | ||
|
044bcf55bd | ||
|
e475af9f49 | ||
|
c6abcd91fa | ||
|
10fc489822 | ||
|
d76d926c38 | ||
|
2b3c167845 | ||
|
1d75b974b5 | ||
|
584bb3a648 | ||
|
7b5ec6b98f | ||
|
23526f6d1a | ||
|
c0930ead0f | ||
|
809b3ee023 | ||
|
23f0c79fba | ||
|
81dd3809e9 | ||
|
2bf0c4497d | ||
|
93122bdd18 | ||
|
978550f809 | ||
|
84fea0fd05 | ||
|
2df3e0f881 | ||
|
c98117f69d | ||
|
ede5d1f890 | ||
|
39911e3acd | ||
|
3d1c15ef99 | ||
|
f718482e98 | ||
|
8dafd13cd7 | ||
|
0b19b28a64 | ||
|
c245f7ce3a | ||
|
607d647483 | ||
|
9a38a455c9 | ||
|
16e0738731 | ||
|
eacbf87979 | ||
|
108b4cb648 | ||
|
a9dff407a1 | ||
|
9e26109e36 | ||
|
6308a8dfcd | ||
|
4baf9527d7 | ||
|
199c459697 | ||
|
61288c5e68 | ||
|
8375237de5 | ||
|
c8d820c17b | ||
|
6319b8ef51 | ||
|
397a85eaa4 | ||
|
3889c4bdd9 | ||
|
c899f1cb85 | ||
|
d8956c51d0 | ||
|
5dd55c7cad | ||
|
c0e61d4c87 | ||
|
97e17282ab | ||
|
94c882af7d | ||
|
89c6d85f2f | ||
|
cf366c602f | ||
|
77ccab7d80 | ||
|
f51ba63742 | ||
|
9044518be5 | ||
|
9e0367eef4 | ||
|
235bb6c1b9 | ||
|
49344d7ea8 | ||
|
1b418d77ff | ||
|
80cc302627 | ||
|
8e1abc3f10 | ||
|
e498c6907a | ||
|
08e8fc6736 | ||
|
f6e9ef6de9 | ||
|
c51159672e | ||
|
233b51e29e | ||
|
54c8e13a68 | ||
|
405300b4b2 | ||
|
a6abd31ead | ||
|
4c26674ff4 | ||
|
40768e935b | ||
|
23be648456 | ||
|
13ee31770a | ||
|
93dc80000c | ||
|
e0cd3cd991 | ||
|
81ae501e73 | ||
|
9b781f8404 | ||
|
f797a92f87 | ||
|
0a579814a2 | ||
|
ec6c9bca62 | ||
|
a433bbbe45 | ||
|
8ca20f184d | ||
|
d160954080 | ||
|
14372e0ef0 | ||
|
03bffa27ac | ||
|
028b5a4f0d | ||
|
cd12f49fc0 | ||
|
a144749a8d | ||
|
1bd146fb8e | ||
|
5f6c3da7a4 | ||
|
d0aa754252 | ||
|
dbe9235f3a | ||
|
d78569986b | ||
|
95323e6caa | ||
|
f809d22fc6 | ||
|
763d61db8d | ||
|
10cad3abb2 | ||
|
9338f35cd8 | ||
|
ead6fa9daa | ||
|
ad660cf420 | ||
|
75f8ae2815 | ||
|
70aa04c047 | ||
|
4aa47e87f2 | ||
|
f8050816ac | ||
|
5b0a6d7ec1 | ||
|
3b4d08f52b | ||
|
6bbf40d7d2 | ||
|
d895f83520 | ||
|
f6b9e8c5eb | ||
|
98bcdf6028 | ||
|
9b385ec7cc | ||
|
5c040f7a46 | ||
|
46232c7fd4 | ||
|
c67d95c00f | ||
|
5e5aaf9a7e | ||
|
35996d0adb | ||
|
eaeb23d41e | ||
|
c71f6ad417 | ||
|
87a8593291 | ||
|
4799dd769e | ||
|
24b4606f96 | ||
|
9f672a0cf4 | ||
|
064bc5ee76 | ||
|
a52d78c8ee | ||
|
a00cabe223 | ||
|
dbe974f510 | ||
|
a284682deb | ||
|
07d7507ac6 | ||
|
c68d17d482 | ||
|
9e185e80ce | ||
|
676e7c7947 | ||
|
04212b2cef | ||
|
bafc2a1f30 | ||
|
563e388a45 | ||
|
d31d8ec5b0 | ||
|
2b00cd632d | ||
|
5f427d2b4c | ||
|
8c0ce4fc1d | ||
|
10a74f45ea | ||
|
320dad7f1a | ||
|
88ac72c8eb | ||
|
f74b9df0a7 | ||
|
a6f1335375 | ||
|
f321fa5ad3 | ||
|
03d999444d | ||
|
763ed260c3 | ||
|
764e7d1315 | ||
|
048f685073 | ||
|
e4d7958379 | ||
|
bdcbfb11a8 | ||
|
3f288e264b | ||
|
dd593c292c | ||
|
fa87c7e1b7 | ||
|
39c1857c61 | ||
|
c57a2d0dc3 | ||
|
a2e6616100 | ||
|
ba4513e82c | ||
|
6525b16e1f | ||
|
b6a92506d1 | ||
|
ffa0366deb | ||
|
00c4686ef0 | ||
|
3101b74580 | ||
|
4e694fdff6 | ||
|
194a6057dd | ||
|
e710e057e2 | ||
|
28188a6e59 | ||
|
70a5df96c8 | ||
|
460998d512 | ||
|
e741301417 | ||
|
5ed5298409 | ||
|
b911665691 | ||
|
56eb83319d | ||
|
1e6800565a | ||
|
c909120ae1 | ||
|
9894f37412 | ||
|
229c63c46d | ||
|
6a04cdfddf | ||
|
c70670bacb | ||
|
7bb3e44a76 | ||
|
b958acb76a | ||
|
b22f4fbb72 | ||
|
e8c0648e04 | ||
|
ebc84c22fb | ||
|
8bd9a00c38 | ||
|
972d03efdf | ||
|
aa0d256d6a | ||
|
4d75fa2908 | ||
|
1a05cba60a | ||
|
bf92c270dc | ||
|
e507844616 | ||
|
ca12dd59f7 | ||
|
6f222b9800 | ||
|
fca62f261e | ||
|
c7f0276005 | ||
|
46409c4c2d | ||
|
46df58d28b | ||
|
15912f31d0 | ||
|
dd380a5fb3 | ||
|
93f49f1fb3 | ||
|
b83bb5a48a | ||
|
704de50a9b | ||
|
fcfe07fb7d | ||
|
ccf4990add | ||
|
f2638dd845 | ||
|
239980ecae | ||
|
6cb784df75 | ||
|
efee904531 | ||
|
bee815b1c4 | ||
|
e296b02649 | ||
|
2656fcfe2c | ||
|
c019a029ec | ||
|
db0216936e | ||
|
46d761f34f | ||
|
4598c7f40f | ||
|
1d486bddee | ||
|
606db54dc8 | ||
|
d8073f0dde | ||
|
df85468c01 | ||
|
4404ad98ae | ||
|
e7192a9cad | ||
|
019b61b330 | ||
|
f997707049 | ||
|
c56ee10185 | ||
|
8210e49b4e | ||
|
e51bf8619d | ||
|
69b28fd07d | ||
|
99884c2c7e | ||
|
a8f2e9ee2c | ||
|
a91b909103 | ||
|
d6b8b38955 | ||
|
99e031c529 | ||
|
998f239ed9 | ||
|
0961f627b1 | ||
|
6483308bb0 | ||
|
a42f707b2d | ||
|
eef37927ba | ||
|
7440da240d | ||
|
d0239368e2 | ||
|
4f8048be31 | ||
|
807fb2d052 | ||
|
ce293029c7 | ||
|
b5ed21be21 | ||
|
251fc63b42 | ||
|
47f3855a4b | ||
|
71dfe9f33e | ||
|
afad4f5ebb | ||
|
4ab1cd9502 | ||
|
52e2ab45bf | ||
|
be444f9172 | ||
|
715d61dfea | ||
|
bf37a3eb25 | ||
|
c2b45bec8d | ||
|
cdfe284f9a | ||
|
08eed17e66 | ||
|
00eb8b90dc | ||
|
912129311d | ||
|
624b78ec3a | ||
|
1d0cea1d55 | ||
|
f01f608474 | ||
|
c22feaf42e | ||
|
63e857f7cd | ||
|
9979c9defe | ||
|
7763df0715 | ||
|
e088eb9ec8 | ||
|
19402772fc | ||
|
ba724bc1b2 | ||
|
8de3e6ab80 | ||
|
659d2134ba | ||
|
867410c66b | ||
|
483c2dbb44 | ||
|
e5c9791b14 | ||
|
58556af6c7 | ||
|
2e29038ecd | ||
|
36a23707c1 | ||
|
c1ea60b399 | ||
|
b08e302dd5 | ||
|
ea66195b97 | ||
|
86a5cc5c5f | ||
|
8f0cbf267b | ||
|
2f8488610a | ||
|
d95f01b701 | ||
|
c9d7635370 | ||
|
6b5fb0f841 | ||
|
12bd74d4f3 | ||
|
37c4cc68ed | ||
|
1c948eb3d8 | ||
|
cd90ca820f | ||
|
9786f82220 | ||
|
6f4e767a04 | ||
|
5411950b87 | ||
|
6ff7e9648f | ||
|
5c071ce4d3 | ||
|
caf3d231a8 | ||
|
730e8f74e4 | ||
|
aba134284f | ||
|
2a6183f9e0 | ||
|
ee143bbc48 | ||
|
d3f01bd171 | ||
|
05ba3bab96 | ||
|
d2b6b2044c | ||
|
7611b7900d | ||
|
9ad32ee9c7 | ||
|
866db6c63f | ||
|
01476577b8 | ||
|
e237df4a10 | ||
|
f11103d31d | ||
|
9288d311d4 | ||
|
77d5e39fe0 | ||
|
27e781761d | ||
|
92cac52813 | ||
|
66bb12e55a | ||
|
a5d980ee56 | ||
|
19c2ceec9b | ||
|
507f26ad47 | ||
|
fd44e09ebd | ||
|
09fd0a1d0e | ||
|
667b0ca0b0 | ||
|
a56953c798 | ||
|
7470c170b1 | ||
|
bc330acfc9 | ||
|
789e8eea85 | ||
|
35b29e4f9e | ||
|
69f333c0bf | ||
|
c069c8c182 | ||
|
9e4aa7da7c | ||
|
e22e65eee4 | ||
|
cb55c76664 | ||
|
d6b07e4d01 | ||
|
995657c6ce | ||
|
58f2f86ea8 | ||
|
7bc1cff286 | ||
|
8f455f3b6d | ||
|
f91d92cccb | ||
|
08ca6399ec | ||
|
c0b5ea0e7d | ||
|
f21a3983aa | ||
|
f6e2216b87 | ||
|
92ed513e4f | ||
|
d7ab21fe34 | ||
|
bca4bbb6c8 | ||
|
e618aa34e9 | ||
|
6e41e78f36 | ||
|
c4dd9a0547 | ||
|
5ec10634d8 | ||
|
cdae74d395 | ||
|
8b74e3aa0d | ||
|
23169ad818 | ||
|
d36e36c8fd | ||
|
948d4d5f08 | ||
|
0960e18f8e | ||
|
825fd10efa | ||
|
1ec6f9cde2 | ||
|
a5118fe8f1 | ||
|
6c88f00a9d | ||
|
bf783dad7a | ||
|
8a53e107fa | ||
|
0ed938545b | ||
|
480abfe966 | ||
|
89e4343fdb | ||
|
8c16a2aede | ||
|
5deec63667 | ||
|
363368b150 | ||
|
74caf9e38a | ||
|
7087ab5f07 | ||
|
0b0cf48849 | ||
|
00d9773b44 | ||
|
ac2d7034db | ||
|
88b9ec70c6 | ||
|
77261a38cd | ||
|
3c7c77fe21 | ||
|
4ee3f6ba3f | ||
|
4c016b0318 | ||
|
f59cab300e | ||
|
ec7826659a | ||
|
98b5f22104 | ||
|
2283ceb77d | ||
|
fba466d6e2 | ||
|
cbbf60a599 | ||
|
c125d8ab48 | ||
|
f03146de4b | ||
|
dbb758d1a8 | ||
|
da8bcc6e24 | ||
|
96eecc6ea5 | ||
|
74644d59f3 | ||
|
0f9b90eb1c | ||
|
ae9537b68e | ||
|
2619d196bb | ||
|
17db23c2c1 | ||
|
040bea1f75 | ||
|
dc8277223a | ||
|
98d1898610 | ||
|
1400fb4a9b | ||
|
647bbfa617 | ||
|
b73fcc19fe | ||
|
d9e6c4f266 | ||
|
34653f03a2 | ||
|
f0a8ca440f | ||
|
d89db10645 | ||
|
413dc6ced4 | ||
|
78f21dd19a | ||
|
2cb209ae9c | ||
|
979a620ead | ||
|
7a17933c65 | ||
|
019fa763cd | ||
|
097a163cf5 | ||
|
2ae0b8c159 | ||
|
31ae71c7d6 | ||
|
5ce894564c | ||
|
813fa08bdd | ||
|
e5792ba8b3 | ||
|
62cc9df206 | ||
|
42375f0e53 | ||
|
24dce8c03b | ||
|
eda615de0f | ||
|
a000256223 | ||
|
9bd0e3ce58 | ||
|
b4d1e0e81e | ||
|
d2fdaafc7a | ||
|
7d86586594 | ||
|
11c26e700e | ||
|
8274e8a953 | ||
|
42afe490b7 | ||
|
0f34beb1aa | ||
|
e853483ef3 | ||
|
baff83912e | ||
|
8e25cfff4f | ||
|
b7dc748942 | ||
|
d71124961e | ||
|
fbdedf53de | ||
|
a6a66c6d8a | ||
|
d167ad2017 | ||
|
7d741ff499 | ||
|
f8f9f04158 | ||
|
b0c7480d06 | ||
|
e7af77e151 | ||
|
ec940e36d0 | ||
|
f92d8a0975 | ||
|
ed373eef61 | ||
|
5abaf13192 | ||
|
d86e8522e2 | ||
|
bd2c3855ed | ||
|
4c627d0e1d | ||
|
c8dd45e37d | ||
|
54c2be893b | ||
|
55c65f0935 | ||
|
b5ef67ed28 | ||
|
f140e7d7c7 | ||
|
a9a2960e86 | ||
|
7326ba74fe | ||
|
a9fabba407 | ||
|
5d862d119c | ||
|
4fc0ddbc45 | ||
|
9455100907 | ||
|
32a6735d03 | ||
|
f0b4acb358 | ||
|
67c15a34e6 | ||
|
9579cdd151 | ||
|
443cf0cf1e | ||
|
4138233ddf | ||
|
2979f4703e | ||
|
77f2ca51af | ||
|
ffcbc6c1c9 | ||
|
186a02acfd | ||
|
6f2a7977c1 | ||
|
266ad2e4de | ||
|
d1a004bea6 | ||
|
e4372289a5 | ||
|
379bccc1a3 | ||
|
21125206b4 | ||
|
44b1fe0e6d | ||
|
88399e30e2 | ||
|
7ed3306be3 | ||
|
eb2ca942d5 | ||
|
2afbdc2269 | ||
|
d82a858491 | ||
|
e332faa07e | ||
|
4e9e79454f | ||
|
b0bc07b4e7 | ||
|
2cda57355a | ||
|
fd192d2791 | ||
|
6814c90625 | ||
|
a57ab427b3 | ||
|
1e184a8372 | ||
|
6fae51a8ef | ||
|
a9f7b4c457 | ||
|
1e3b6934bb | ||
|
0a5e85be8f | ||
|
7694a15f62 | ||
|
d72da01a92 | ||
|
112f43b3a1 | ||
|
f12c6fd57e | ||
|
96bac70b85 | ||
|
619fb8ba80 | ||
|
5a5cdaf70e | ||
|
bb587ca47f | ||
|
4d298cd5fa | ||
|
fbbaf584ba | ||
|
df1850bd45 | ||
|
d42ab19166 | ||
|
2ab26f37b8 | ||
|
ec7ce7b0b3 | ||
|
3e9b37c264 | ||
|
502713f7a8 | ||
|
e19256a6b6 | ||
|
ccc41d1717 | ||
|
e49b1dd155 | ||
|
fe419b12b4 | ||
|
810515c08d | ||
|
535a51a621 | ||
|
a278fc6296 | ||
|
f1c6525a50 | ||
|
ace0d19973 | ||
|
40b8c8c128 | ||
|
716ab35b4e | ||
|
3fb249758e | ||
|
f7a6ef2179 | ||
|
105d93cd85 | ||
|
1e4157017d | ||
|
5fb75adaae | ||
|
affcf8cf41 | ||
|
cc4e089a5d | ||
|
32b9c2e671 | ||
|
4d05be4095 | ||
|
137adb9c3c | ||
|
16e37672fc | ||
|
d0c72ceb7e | ||
|
0fcb2b534c | ||
|
dcf6218cdb | ||
|
8f51345a1d | ||
|
03bd892b95 | ||
|
1e65ac3940 | ||
|
e622437560 | ||
|
30584887f9 | ||
|
17ca4f9eea | ||
|
7fb3db3249 | ||
|
b60fcd0918 | ||
|
862e925d7c | ||
|
a0027ad32b | ||
|
c5f4cb34bf | ||
|
2e69179f12 | ||
|
19000ab339 | ||
|
be3ba3ef37 | ||
|
ac1ac3ea57 | ||
|
0b505939ed | ||
|
d2a9e1b644 | ||
|
3464ca514b | ||
|
2f2c86a9f5 | ||
|
45dd9fea25 | ||
|
c974d72e7e | ||
|
e3675d2fa9 | ||
|
c4a6bf7672 | ||
|
5edc0c8d52 | ||
|
cb6865924e | ||
|
ee50f7422d | ||
|
964014860a | ||
|
7c58ddce81 | ||
|
611ba2d35a | ||
|
ecd6ed186f | ||
|
b07f84bc01 | ||
|
ad2e939018 | ||
|
2a90ade80f | ||
|
d9c456d772 | ||
|
b2599a6d33 | ||
|
38fd49b271 | ||
|
85fea2ecaa | ||
|
1b9ae7b42d | ||
|
46f81aca2f | ||
|
ca22c287a5 | ||
|
1bb1248ab0 | ||
|
624e8acd41 | ||
|
b7190ebc69 | ||
|
a74f899d28 | ||
|
8e2225e346 | ||
|
29c76fcdce | ||
|
55d0fa61d7 | ||
|
554a7fde80 | ||
|
bdd47ecd03 | ||
|
72115e490f | ||
|
bdd2b4a43e | ||
|
3402b31c30 | ||
|
0682550bd2 | ||
|
6e89377dea | ||
|
950c49d80f | ||
|
8def95e849 | ||
|
43430728aa | ||
|
6a20b2b678 | ||
|
3675c7a090 | ||
|
2eb972dea1 | ||
|
449471a076 | ||
|
5ef17a2a20 | ||
|
0ecab53635 | ||
|
d82532b7f1 | ||
|
281f1322a9 | ||
|
347f16939c | ||
|
6e1ddca293 | ||
|
8b8fc49901 | ||
|
73eaa0865d | ||
|
616effdb3c | ||
|
4cec89da91 | ||
|
2e2916cebe | ||
|
6d939175b1 | ||
|
73e412ea5b | ||
|
c2f9980eba | ||
|
0532e8c40e | ||
|
919b80b9ab | ||
|
1aee6fdc11 | ||
|
a62015d5f3 | ||
|
722ff3bffb | ||
|
2bc212d65c | ||
|
efb46cc703 | ||
|
8fb531c614 | ||
|
9ece07d559 | ||
|
0ae4731cf1 | ||
|
02db999762 | ||
|
2a25b5e8a9 | ||
|
5c1a59257c | ||
|
d4010c76cf | ||
|
6fb0f77eea | ||
|
1252f95da5 | ||
|
f3babde415 | ||
|
ad1be7c835 | ||
|
c47eb25483 | ||
|
58fcddedbb | ||
|
11af3f3e64 | ||
|
549d323f6d | ||
|
a2885acdf4 | ||
|
bd84c73e05 | ||
|
04f501b8c8 | ||
|
be040419f3 | ||
|
384de2e54b | ||
|
43f3380cb9 | ||
|
bce452fb4f | ||
|
0553174401 | ||
|
2d066af5b9 | ||
|
b945fd7f39 | ||
|
f19c9a2863 | ||
|
87767b14bd | ||
|
fe0d754f2c | ||
|
fd1b56dbad | ||
|
ed8ec0990e | ||
|
4d94a023c9 | ||
|
96f6cd19e9 | ||
|
cf00963e57 | ||
|
4464055715 | ||
|
4972ad4c4f | ||
|
a0482273e0 | ||
|
0b7f8e1459 | ||
|
1264c64a15 | ||
|
83c32dc1a6 | ||
|
6b10413efe | ||
|
cd476dd243 | ||
|
c5ccf0681b | ||
|
bdaeb73ebb | ||
|
508cadd33f | ||
|
449c62b666 | ||
|
2423735a20 | ||
|
696f791eb5 | ||
|
7aec667cb7 | ||
|
cfbd0017e3 | ||
|
cf386e0fd4 | ||
|
9a0d1d5d4e | ||
|
592766ba65 | ||
|
036adcbe1f | ||
|
aa24e80c40 | ||
|
a061a7e1f6 | ||
|
6130908285 | ||
|
3b5002aac8 | ||
|
c7014bbc92 | ||
|
5f758cbb0e | ||
|
ef87e123ba | ||
|
3e5e537a6b | ||
|
15754d7ae8 | ||
|
91b4579edc | ||
|
03b7d7bbbe | ||
|
7d3f9c4bab | ||
|
cfd408dbbd | ||
|
d7164ea26f | ||
|
ff115f3331 | ||
|
8d1172f56e | ||
|
6aad8de316 | ||
|
e37e599703 | ||
|
d3fa7d5181 | ||
|
3cebc08826 | ||
|
7259c65052 | ||
|
b4051c35e1 | ||
|
cf1f878a39 | ||
|
83d20ccf48 | ||
|
696508034a | ||
|
ef1a39862c | ||
|
25483adf7f | ||
|
81cb6f4ea0 | ||
|
1b776b114e | ||
|
4fdaaa16ba |
14
.github/FUNDING.yml
vendored
Normal file
14
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
polar: marginalia-search
|
||||
github: MarginaliaSearch
|
||||
patreon: marginalia_nu
|
||||
open_collective: # Replace with a single Open Collective username
|
||||
ko_fi: # Replace with a single Ko-fi username
|
||||
tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
|
||||
community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
|
||||
liberapay: # Replace with a single Liberapay username
|
||||
issuehunt: # Replace with a single IssueHunt username
|
||||
otechie: # Replace with a single Otechie username
|
||||
lfx_crowdfunding: # Replace with a single LFX Crowdfunding project-name e.g., cloud-foundry
|
||||
custom: https://www.buymeacoffee.com/marginalia.nu
|
4
.gitignore
vendored
4
.gitignore
vendored
@@ -4,3 +4,7 @@ build/
|
||||
*~
|
||||
.gradle/
|
||||
.idea/
|
||||
lombok.config
|
||||
Dockerfile
|
||||
run
|
||||
jte-classes
|
6
Additional Contributors.md
Normal file
6
Additional Contributors.md
Normal file
@@ -0,0 +1,6 @@
|
||||
Not everyone shows up in the git commit history, doesn't mean they didn't contribute valuable changes.
|
||||
In such circumstances, their deeds will be recorded here.
|
||||
|
||||
* [@samstorment](https://www.github.com/samstorment) provided a design overhaul for [https://explore.marginalia.nu/](https://explore.marginalia.nu/) in [10cad3](https://github.com/MarginaliaSearch/MarginaliaSearch/commit/10cad3abb29b8a87bf5fd56afbc192335e3e94d7)
|
||||
via [issue #44](https://github.com/MarginaliaSearch/MarginaliaSearch/issues/44).
|
||||
* [@dreimolo](https://github.com/dreimolo) provided build script [fixes for apple silicon](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/64)
|
@@ -1,42 +1,20 @@
|
||||
# Contributing
|
||||
|
||||
At present this is mostly a solo project, but
|
||||
external contributions are very welcome.
|
||||
|
||||
This is a bit of a special project,
|
||||
This is a bit of a special project,
|
||||
in part because a search engine isn't
|
||||
like a text editor that you can just
|
||||
download and tinker with; and in part
|
||||
like a text editor that you can just
|
||||
download and tinker with; and in part
|
||||
because it's as much a research project
|
||||
as it is a search engine.
|
||||
|
||||
If you have an idea for a cool change,
|
||||
send an email to <kontakt@marginalia.nu> and
|
||||
we can discuss its feasibility.
|
||||
If you have an idea for a cool change,
|
||||
email <kontakt@marginalia.nu> and
|
||||
we can discuss its feasibility.
|
||||
|
||||
Search is essentially a fractal of interesting
|
||||
problems, so even if you don't have an idea,
|
||||
problems, so even if you don't have an idea,
|
||||
just a skillset (really any), odds are there's
|
||||
something interesting I could point you to.
|
||||
|
||||
## Release and branches
|
||||
|
||||
The search engine has a release cycle of
|
||||
once per 6-8 weeks, coinciding with the crawling
|
||||
cycle. Where model-breaking changes and changes to
|
||||
the crawler can be introduced.
|
||||
|
||||
## Running and set-up
|
||||
|
||||
There is a complementary project, wmsa.local, which
|
||||
contains scripts and instructions for running this
|
||||
code base.
|
||||
|
||||
[https://git.marginalia.nu/marginalia/wmsa.local](https://git.marginalia.nu/marginalia/wmsa.local)
|
||||
|
||||
## Documentation
|
||||
|
||||
What documentation exists resides here:
|
||||
|
||||
https://git.marginalia.nu/marginalia/marginalia.nu/wiki
|
||||
|
||||
Make sure you check out the [ide-configuration guide](doc/ide-configuration.md)
|
||||
to get your IDE set up quickly and easily.
|
@@ -14,3 +14,4 @@
|
||||
You should have received a copy of the GNU Affero General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
Note that packages under [third-party/](third-party/) have different licenses, and the code in [code/libraries/](code/libraries/) is dual-licensed under MIT.
|
||||
|
121
NGI0Entrust_tag.svg
Normal file
121
NGI0Entrust_tag.svg
Normal file
@@ -0,0 +1,121 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<!-- Created with Inkscape (http://www.inkscape.org/) -->
|
||||
|
||||
<svg
|
||||
version="1.1"
|
||||
id="svg2"
|
||||
xml:space="preserve"
|
||||
width="1600.5095"
|
||||
height="502.77777"
|
||||
viewBox="0 0 480.15286 150.83333"
|
||||
xmlns:xlink="http://www.w3.org/1999/xlink"
|
||||
xmlns="http://www.w3.org/2000/svg"
|
||||
xmlns:svg="http://www.w3.org/2000/svg"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:cc="http://creativecommons.org/ns#"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"><metadata
|
||||
id="metadata8"><rdf:RDF><cc:Work
|
||||
rdf:about=""><dc:format>image/svg+xml</dc:format><dc:type
|
||||
rdf:resource="http://purl.org/dc/dcmitype/StillImage" /></cc:Work></rdf:RDF></metadata><defs
|
||||
id="defs6"><linearGradient
|
||||
id="linearGradient1220"><stop
|
||||
id="stop1216"
|
||||
offset="0"
|
||||
style="stop-color:#98bf00;stop-opacity:1;" /><stop
|
||||
id="stop1218"
|
||||
offset="1"
|
||||
style="stop-color:#98bf00;stop-opacity:0.51" /></linearGradient><linearGradient
|
||||
x1="0"
|
||||
y1="0"
|
||||
x2="1"
|
||||
y2="0"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
gradientTransform="matrix(-139.45511,-135.52185,-135.52185,139.45511,177.4727,131.75308)"
|
||||
spreadMethod="pad"
|
||||
id="linearGradient28"><stop
|
||||
style="stop-opacity:1;stop-color:#00afbc"
|
||||
offset="0"
|
||||
id="stop24" /><stop
|
||||
style="stop-opacity:1;stop-color:#205374"
|
||||
offset="1"
|
||||
id="stop26" /></linearGradient><clipPath
|
||||
clipPathUnits="userSpaceOnUse"
|
||||
id="clipPath38"><path
|
||||
d="M 0,127.984 H 415.474 V 0 H 0 Z"
|
||||
id="path36" /></clipPath><linearGradient
|
||||
xlink:href="#linearGradient1220"
|
||||
id="linearGradient947"
|
||||
gradientUnits="userSpaceOnUse"
|
||||
x1="14.915152"
|
||||
y1="14.167241"
|
||||
x2="214.11908"
|
||||
y2="111.76186"
|
||||
gradientTransform="matrix(4.4444443,0,0,-4.4444443,-33.008887,535.8)" /><clipPath
|
||||
clipPathUnits="userSpaceOnUse"
|
||||
id="clipPath38-9"><path
|
||||
d="M 0,127.984 H 415.474 V 0 H 0 Z"
|
||||
id="path36-1" /></clipPath></defs><g
|
||||
id="g10"
|
||||
transform="matrix(1.3333333,0,0,-1.3333333,-9.9026662,160.74)"><g
|
||||
id="g40"
|
||||
transform="translate(175.9982,95.8645)" /><g
|
||||
id="g44"
|
||||
transform="translate(152.1193,64.9934)" />
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<g
|
||||
id="NGI0Entrust"><title
|
||||
id="title12661">NGI Zero Entrust</title><path
|
||||
id="path7692"
|
||||
style="fill:#ffffff;fill-opacity:1;stroke:none;stroke-width:0.999999"
|
||||
d="m 133.10651,96.933602 c -6.67899,0 -12.68988,-1.41201 -18.02988,-4.23501 -5.344,-2.822 -9.51678,-6.73803 -12.52178,-11.74702 -3.004994,-5.008 -4.507906,-10.66967 -4.507906,-16.982669 0,-6.314995 1.502912,-11.974991 4.507906,-16.983985 3.005,-5.008995 7.14794,-8.924024 12.42993,-11.747021 5.282,-2.823998 11.23084,-4.23501 17.84883,-4.23501 4.613,0 9.19693,0.698875 13.75093,2.094873 0.045,0.014 0.0912,0.02819 0.13623,0.04219 7.10399,2.201999 11.88413,8.859686 11.88413,16.29668 v 9.047022 c 0,3.581996 -2.90333,6.485889 -6.48633,6.485889 h -0.50581 c -0.064,0 -0.12704,-0.0077 -0.19204,-0.0097 -0.064,0.002 -0.12704,0.0097 -0.19204,0.0097 h -7.28306 c -3.92899,0 -7.35908,-2.964914 -7.61308,-6.884912 -0.278,-4.295996 3.12428,-7.86709 7.36128,-7.86709 0.776,0 1.34293,-0.753702 1.11093,-1.493702 -0.65799,-2.087998 -2.34102,-3.751009 -4.54702,-4.333008 -2.07399,-0.546999 -4.27598,-0.820898 -6.60498,-0.820898 -4.00699,0 -7.57381,0.864972 -10.6998,2.594971 -3.127,1.729999 -5.5704,4.143993 -7.3314,7.23999 -1.761,3.095997 -2.64067,6.617018 -2.64067,10.564014 0,4.005996 0.87967,7.557666 2.64067,10.653656 1.761,3.097 4.2191,5.49317 7.3771,7.19517 3.156,1.698 6.76804,2.54883 10.83604,2.54883 4.68099,0 8.8649,-1.26899 12.5499,-3.80699 2.341,-1.61199 5.52423,-1.58761 7.75723,0.17139 3.47999,2.741 3.2889,8.04495 -0.31509,10.45196 -1.7,1.13599 -3.53807,2.11163 -5.51206,2.92763 -4.553,1.881 -9.62316,2.82305 -15.20816,2.82305 z m -93.706345,-1.09248 c -4.022996,0 -7.284815,-3.26081 -7.284815,-7.28482 v -49.17612 c 0,-4.022993 3.261819,-7.284815 7.284815,-7.284815 4.023996,0 7.284814,3.261822 7.284814,7.284815 V 62.34029 c 0,2.842996 3.564362,4.118722 5.36836,1.921728 L 76.282148,34.757135 c 1.383999,-1.685 3.450155,-2.661768 5.631153,-2.661768 h 1.380761 c 4.023997,0 7.286133,3.261822 7.286133,7.284815 v 49.17612 c 0,4.02401 -3.262136,7.28482 -7.286133,7.28482 -4.023995,0 -7.284815,-3.26081 -7.284815,-7.28482 V 65.615095 c 0,-2.844997 -3.568118,-4.119773 -5.370117,-1.917774 L 46.503925,93.172322 c -1.382997,1.69 -3.45199,2.6688 -5.635987,2.6688 z m 136.597415,-4.4e-4 c -4.074,0 -7.37578,-3.30178 -7.37578,-7.37578 V 39.472027 c 0,-4.073996 3.30178,-7.37622 7.37578,-7.37622 4.074,0 7.37622,3.302224 7.37622,7.37622 v 48.992875 c 0,4.074 -3.30222,7.37578 -7.37622,7.37578 z" /><path
|
||||
id="path30"
|
||||
style="fill:url(#linearGradient947);fill-opacity:1;stroke:none;stroke-width:4.44444"
|
||||
d="M 79.115234 30 C 52.097457 30 30 52.101902 30 79.115234 L 30 423.66211 C 30 450.67989 52.097457 472.77734 79.115234 472.77734 L 812.60352 472.77734 C 839.61685 472.77734 861.7207 450.67544 861.7207 423.66211 L 861.7207 342.50586 C 861.7207 333.51919 865.28844 324.89711 871.64844 318.53711 L 912.07617 278.11133 C 923.36506 266.82688 923.33313 248.52428 912.01758 237.27539 L 871.7207 197.19922 C 865.3207 190.83922 861.7207 182.18238 861.7207 173.16016 L 861.7207 79.115234 C 861.7207 52.101902 839.61685 30 812.60352 30 L 79.115234 30 z M 558.57812 104.87891 C 583.40035 104.87891 605.93437 109.06578 626.16992 117.42578 C 634.94325 121.05245 643.11241 125.38861 650.66797 130.4375 C 666.68575 141.13528 667.53503 164.7084 652.06836 176.89062 C 642.14392 184.7084 627.99624 184.81679 617.5918 177.65234 C 601.21402 166.37234 582.6189 160.73242 561.81445 160.73242 C 543.73445 160.73242 527.68096 164.51388 513.6543 172.06055 C 499.61874 179.62499 488.69385 190.27462 480.86719 204.03906 C 473.04052 217.79906 469.13086 233.58423 469.13086 251.38867 C 469.13086 268.93089 473.04052 284.57984 480.86719 298.33984 C 488.69385 312.09984 499.55339 322.82869 513.45117 330.51758 C 527.3445 338.20647 543.19697 342.05078 561.00586 342.05078 C 571.35697 342.05078 581.14355 340.83345 590.36133 338.40234 C 600.16577 335.81568 607.64587 328.42453 610.57031 319.14453 C 611.60142 315.85564 609.0817 312.50586 605.63281 312.50586 C 586.8017 312.50586 571.68046 296.63435 572.91602 277.54102 C 574.0449 260.11879 589.28973 246.94141 606.75195 246.94141 L 639.12109 246.94141 C 639.40998 246.94141 639.69016 246.97549 639.97461 246.98438 C 640.2635 246.97549 640.54368 246.94141 640.82812 246.94141 L 643.07617 246.94141 C 659.00062 246.94141 671.9043 259.84758 671.9043 275.76758 L 671.9043 315.97656 C 671.9043 349.0299 650.65927 378.61958 619.08594 388.40625 C 618.88594 388.46847 618.68047 388.53153 618.48047 388.59375 C 598.24047 394.79819 577.86746 397.9043 557.36523 397.9043 C 527.9519 397.9043 501.51266 391.63314 478.03711 379.08203 C 454.56155 366.53536 436.14852 349.13527 422.79297 326.87305 C 409.43741 304.61083 402.75781 279.45534 402.75781 251.38867 C 402.75781 223.33089 409.43741 198.16793 422.79297 175.91016 C 436.14852 153.64793 454.6942 136.24339 478.44531 123.70117 C 502.17865 111.15451 528.89368 104.87891 558.57812 104.87891 z M 142.10547 109.73438 L 148.62891 109.73438 C 158.33557 109.73438 167.53107 114.08459 173.67773 121.5957 L 280.94531 252.5957 C 288.9542 262.38237 304.8125 256.71671 304.8125 244.07227 L 304.8125 142.11133 C 304.8125 124.22688 319.30501 109.73438 337.18945 109.73438 C 355.0739 109.73438 369.57227 124.22688 369.57227 142.11133 L 369.57227 360.67188 C 369.57227 378.55187 355.0739 393.04883 337.18945 393.04883 L 331.05273 393.04883 C 321.3594 393.04883 312.1765 388.70764 306.02539 381.21875 L 198.3418 250.08594 C 190.32402 240.32149 174.48242 245.9914 174.48242 258.62695 L 174.48242 360.67188 C 174.48242 378.55187 159.98991 393.04883 142.10547 393.04883 C 124.22547 393.04883 109.72852 378.55187 109.72852 360.67188 L 109.72852 142.11133 C 109.72852 124.22688 124.22547 109.73438 142.10547 109.73438 z M 749.20508 109.73633 C 767.31174 109.73633 781.98828 124.41091 781.98828 142.51758 L 781.98828 360.26367 C 781.98828 378.37034 767.31174 393.04688 749.20508 393.04688 C 731.09841 393.04688 716.42383 378.37034 716.42383 360.26367 L 716.42383 142.51758 C 716.42383 124.41091 731.09841 109.73633 749.20508 109.73633 z "
|
||||
transform="matrix(0.22500001,0,0,-0.22500001,7.4269998,120.555)" /><g
|
||||
aria-label="Z E R O"
|
||||
transform="scale(1,-1)"
|
||||
id="text56"
|
||||
style="font-weight:600;font-size:31.76px;font-family:'Montserrat SemiBold';-inkscape-font-specification:Montserrat-SemiBold;fill:#6f9aa8"><path
|
||||
d="m 261.75384,-85.665085 -13.08512,15.97528 h 13.498 v 3.4936 H 243.206 v -2.76312 l 13.08512,-15.97528 h -12.8628 v -3.4936 h 18.32552 z"
|
||||
id="path12603" /><path
|
||||
d="m 278.84063,-75.787725 v 6.12968 h 12.5452 v 3.46184 h -16.674 v -22.232 h 16.22936 v 3.46184 h -12.10056 v 5.78032 h 10.73488 v 3.39832 z"
|
||||
id="path12605" /><path
|
||||
d="m 323.74919,-66.196205 h -4.4464 l -4.54168,-6.5108 q -0.28584,0.03176 -0.85752,0.03176 h -5.01808 v 6.47904 h -4.1288 v -22.232 h 9.14688 q 2.89016,0 5.01808,0.9528 2.15968,0.9528 3.30304,2.73136 1.14336,1.77856 1.14336,4.22408 0,2.50904 -1.23864,4.31936 -1.20688,1.81032 -3.4936,2.6996 z m -4.54168,-14.32376 q 0,-2.12792 -1.39744,-3.27128 -1.39744,-1.14336 -4.09704,-1.14336 h -4.82752 v 8.86104 h 4.82752 q 2.6996,0 4.09704,-1.14336 1.39744,-1.17512 1.39744,-3.30304 z"
|
||||
id="path12607" /><path
|
||||
d="m 347.12448,-65.878605 q -3.39832,0 -6.12968,-1.46096 -2.73136,-1.49272 -4.2876,-4.09704 -1.55624,-2.63608 -1.55624,-5.8756 0,-3.23952 1.55624,-5.84384 1.55624,-2.63608 4.2876,-4.09704 2.73136,-1.49272 6.12968,-1.49272 3.39832,0 6.12968,1.49272 2.73136,1.46096 4.2876,4.06528 1.55624,2.60432 1.55624,5.8756 0,3.27128 -1.55624,5.8756 -1.55624,2.60432 -4.2876,4.09704 -2.73136,1.46096 -6.12968,1.46096 z m 0,-3.62064 q 2.2232,0 4.00176,-0.98456 1.77856,-1.01632 2.79488,-2.79488 1.01632,-1.81032 1.01632,-4.03352 0,-2.2232 -1.01632,-4.00176 -1.01632,-1.81032 -2.79488,-2.79488 -1.77856,-1.01632 -4.00176,-1.01632 -2.2232,0 -4.00176,1.01632 -1.77856,0.98456 -2.79488,2.79488 -1.01632,1.77856 -1.01632,4.00176 0,2.2232 1.01632,4.03352 1.01632,1.77856 2.79488,2.79488 1.77856,0.98456 4.00176,0.98456 z"
|
||||
id="path12609" /></g><g
|
||||
aria-label="ENTRUST"
|
||||
transform="scale(0.99994801,-1.000052)"
|
||||
id="Entrust"
|
||||
style="font-weight:bold;font-size:20.009px;font-family:'Montserrat SemiBold';-inkscape-font-specification:'Montserrat SemiBold, Bold';letter-spacing:3.55932px;fill:#6f9aa8;stroke-width:0.999947"><path
|
||||
d="m 245.81989,-41.935548 v 3.861737 h 7.90356 v 2.180981 h -10.50473 v -14.0063 h 10.2246 v 2.180981 h -7.62343 v 3.641638 h 6.76304 v 2.140963 z"
|
||||
id="path12612" /><path
|
||||
d="m 270.04847,-40.414864 v -9.484266 h 2.58116 v 14.0063 h -2.14096 l -7.72347,-9.484266 v 9.484266 h -2.58117 v -14.0063 h 2.14097 z"
|
||||
id="path12614" /><path
|
||||
d="m 285.39308,-35.89283 h -2.60117 v -11.80531 h -4.64209 v -2.20099 h 11.88535 v 2.20099 h -4.64209 z"
|
||||
id="path12616" /><path
|
||||
d="m 307.52074,-35.89283 h -2.80126 l -2.86129,-4.101845 q -0.18008,0.02001 -0.54024,0.02001 h -3.16142 v 4.081836 h -2.60117 v -14.0063 h 5.76259 q 1.82082,0 3.16142,0.60027 1.36061,0.60027 2.08094,1.720774 0.72032,1.120504 0.72032,2.661197 0,1.580711 -0.78035,2.721224 -0.76034,1.140513 -2.20099,1.700765 z m -2.86129,-9.024059 q 0,-1.340603 -0.88039,-2.060927 -0.8804,-0.720324 -2.58116,-0.720324 h -3.04137 v 5.582511 h 3.04137 q 1.70076,0 2.58116,-0.720324 0.88039,-0.740333 0.88039,-2.080936 z"
|
||||
id="path12618" /><path
|
||||
d="m 319.76395,-35.69274 q -2.90131,0 -4.52204,-1.620729 -1.62073,-1.640738 -1.62073,-4.682106 v -7.903555 h 2.60117 v 7.80351 q 0,4.121854 3.5616,4.121854 3.5416,0 3.5416,-4.121854 v -7.80351 h 2.56115 v 7.903555 q 0,3.041368 -1.62073,4.682106 -1.60072,1.620729 -4.50202,1.620729 z"
|
||||
id="path12620" /><path
|
||||
d="m 337.4296,-35.69274 q -1.62073,0 -3.14141,-0.460207 -1.50068,-0.460207 -2.38107,-1.220549 l 0.9004,-2.020909 q 0.86039,0.680306 2.10095,1.120504 1.26056,0.420189 2.52113,0.420189 1.5607,0 2.32105,-0.500225 0.78035,-0.500225 0.78035,-1.320594 0,-0.60027 -0.4402,-0.980441 -0.42019,-0.40018 -1.08049,-0.620279 -0.66029,-0.220099 -1.80081,-0.500225 -1.60072,-0.380171 -2.60117,-0.760342 -0.98044,-0.380171 -1.70076,-1.180531 -0.70032,-0.820369 -0.70032,-2.20099 0,-1.160522 0.62028,-2.100945 0.64029,-0.960432 1.90086,-1.520684 1.28057,-0.560252 3.1214,-0.560252 1.28058,0 2.52113,0.320144 1.24056,0.320144 2.14097,0.920414 l -0.82037,2.020909 q -0.92042,-0.540243 -1.92087,-0.820369 -1.00045,-0.280126 -1.94087,-0.280126 -1.54069,0 -2.30103,0.520234 -0.74034,0.520234 -0.74034,1.380621 0,0.60027 0.42019,0.980441 0.4402,0.380171 1.1005,0.60027 0.66029,0.220099 1.80081,0.500225 1.5607,0.360162 2.56115,0.760342 1.00045,0.380171 1.70076,1.180531 0.72033,0.80036 0.72033,2.160972 0,1.160522 -0.64029,2.100945 -0.62028,0.940423 -1.90085,1.500675 -1.28058,0.560252 -3.12141,0.560252 z"
|
||||
id="path12622" /><path
|
||||
d="m 354.47498,-35.89283 h -2.60117 v -11.80531 h -4.64209 v -2.20099 h 11.88535 v 2.20099 h -4.64209 z"
|
||||
id="path12624" /></g></g>
|
||||
|
||||
|
||||
|
||||
<text
|
||||
style="font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;font-size:20.01px;font-family:'Montserrat SemiBold';-inkscape-font-specification:'Montserrat SemiBold, Bold';font-variant-ligatures:normal;font-variant-caps:normal;font-variant-numeric:normal;font-feature-settings:normal;text-align:start;writing-mode:lr-tb;text-anchor:start;fill:#6f9aa8;fill-opacity:1;fill-rule:nonzero;stroke:none;stroke-width:1"
|
||||
id="text2843"
|
||||
x="240.16206"
|
||||
y="-35.894695"
|
||||
transform="scale(1,-1)"><tspan
|
||||
id="tspan2841"
|
||||
x="240.16206"
|
||||
y="-35.894695" /></text></g></svg>
|
After Width: | Height: | Size: 14 KiB |
106
README.md
106
README.md
@@ -1,40 +1,104 @@
|
||||
# marginalia.nu
|
||||
# Marginalia Search
|
||||
|
||||
This is the source code for marginalia.nu, including the [search engine](https://search.marginalia.nu),
|
||||
the [MEMEX/gemini server](https://memex.marginalia.nu), the and the [encyclopedia service](https://encyclopedia.marginalia.nu).
|
||||
This is the source code for [Marginalia Search](https://search.marginalia.nu).
|
||||
|
||||
The aim of the project is to develop new and alternative discovery methods for the Internet.
|
||||
It's an experimental workshop as much as it is a public service, the overarching goal is to
|
||||
elevate the more human, non-commercial sides of the Internet. A side-goal is to do this without
|
||||
requiring datacenters and expensive enterprise hardware, to run this operation on affordable hardware.
|
||||
elevate the more human, non-commercial sides of the Internet.
|
||||
|
||||
The canonical git server for this project is [https://git.marginalia.nu](https://git.marginalia.nu).
|
||||
It is fine to mirror it on other hosts, but if you have issues or questions
|
||||
git.marginalia.nu is where you want to go.
|
||||
A side-goal is to do this without requiring datacenters and enterprise hardware budgets,
|
||||
to be able to run this operation on affordable hardware with minimal operational overhead.
|
||||
|
||||
## Important note about wmsa.local
|
||||
The long term plan is to refine the search engine so that it provide enough public value
|
||||
that the project can be funded through grants, donations and commercial API licenses
|
||||
(non-commercial share-alike is always free).
|
||||
|
||||
This project has a [sister repository called wmsa.local](https://git.marginalia.nu/marginalia/wmsa.local)
|
||||
that contains scripts and configuration files for running and developing the code.
|
||||
The system can both be run as a copy of Marginalia Search, or as a white-label search engine
|
||||
for your own data (either crawled or side-loaded). At present the logic isn't very configurable, and a lot of the judgements
|
||||
made are based on the Marginalia project's goals, but additional configurability is being
|
||||
worked on!
|
||||
|
||||
Without it, development is very unpleasant.
|
||||
Here's a demo of the set-up and operation of the self-hostable barebones mode of the search engine: [🌎 https://www.youtube.com/watch?v=PNwMkenQQ24](https://www.youtube.com/watch?v=PNwMkenQQ24)
|
||||
|
||||
While developing the code, you will want an environment variable WMSA_HOME pointing to
|
||||
the directory in which wmsa.local is checked out, otherwise the code will not run and
|
||||
several tests will fail.
|
||||
## Set up
|
||||
|
||||
## Documentation
|
||||
To set up a local test environment, follow the instructions in [📄 run/readme.md](run/readme.md)!
|
||||
|
||||
Documentation is a work in progress. See the [wiki](https://git.marginalia.nu/marginalia/marginalia.nu/wiki).
|
||||
Further documentation is available at [🌎 https://docs.marginalia.nu/](https://docs.marginalia.nu/).
|
||||
|
||||
## Contributing
|
||||
Before compiling, it's necessary to run [⚙️ run/setup.sh](run/setup.sh).
|
||||
This will download supplementary model data that is necessary to run the code.
|
||||
These are also necessary to run the tests.
|
||||
|
||||
[CONTRIBUTING.md](CONTRIBUTING.md)
|
||||
If you wish to hack on the code, check out [📄 doc/ide-configuration.md](doc/ide-configuration.md).
|
||||
|
||||
## Supporting
|
||||
## Hardware Requirements
|
||||
|
||||
Consider [supporting this project](https://memex.marginalia.nu/projects/edge/supporting.gmi).
|
||||
A production-like environment requires a lot of RAM and ideally enterprise SSDs for
|
||||
the index, as well as some additional terabytes of slower harddrives for storing crawl
|
||||
data. It can be made to run on smaller hardware by limiting size of the index.
|
||||
|
||||
The system will definitely run on a 32 Gb machine, possibly smaller, but at that size it may not perform
|
||||
very well as it relies on disk caching to be fast.
|
||||
|
||||
A local developer's deployment is possible with much smaller hardware (and index size).
|
||||
|
||||
## Project Structure
|
||||
|
||||
[📁 code/](code/) - The Source Code. See [📄 code/readme.md](code/readme.md) for a further breakdown of the structure and architecture.
|
||||
|
||||
[📁 run/](run/) - Scripts and files used to run the search engine locally
|
||||
|
||||
[📁 third-party/](third-party/) - Third party code
|
||||
|
||||
[📁 doc/](doc/) - Supplementary documentation
|
||||
|
||||
[📄 CONTRIBUTING.md](CONTRIBUTING.md) - How to contribute
|
||||
|
||||
[📄 LICENSE.md](LICENSE.md) - License terms
|
||||
|
||||
## Contact
|
||||
|
||||
You can email <kontakt@marginalia.nu> with any questions or feedback.
|
||||
|
||||
## License
|
||||
|
||||
The bulk of the project is available with AGPL 3.0, with exceptions. Some parts are co-licensed under MIT,
|
||||
third party code may have different licenses. See the appropriate readme.md / license.md.
|
||||
|
||||
## Versioning
|
||||
|
||||
The project uses modified Calendar Versioning, where the first two pairs of numbers are a year and month coinciding
|
||||
with the latest crawling operation, and the third number is a patch number.
|
||||
|
||||
```
|
||||
version
|
||||
--
|
||||
yy.mm.VV
|
||||
-----
|
||||
crawl
|
||||
```
|
||||
|
||||
For example, `23.03.02` is a release with crawl data from March 2023 (released in May 2023).
|
||||
It is the second patch for the 23.02 release.
|
||||
|
||||
Versions with the same year and month are compatible with each other, or offer an upgrade path where the same
|
||||
data set can be used, but across different crawl sets data format changes may be introduced, and you're generally
|
||||
expected to re-crawl the data from scratch as crawler data has shelf life approximately as long as the major release
|
||||
cycles of this project. After about 2-3 months it gets noticeably stale with many dead links.
|
||||
|
||||
For development purposes, crawling is discouraged and sample data is available. See [📄 run/readme.md](run/readme.md)
|
||||
for more information.
|
||||
|
||||
## Funding
|
||||
|
||||
### Donations
|
||||
|
||||
Consider [donating to the project](https://www.marginalia.nu/marginalia-search/supporting/).
|
||||
|
||||
### Grants
|
||||
|
||||
This project was funded through the [NGI0 Entrust Fund](https://nlnet.nl/entrust), a fund established by [NLnet](https://nlnet.nl) with financial support from the European Commission's [Next Generation Internet](https://ngi.eu/) programme, under the aegis of DG Communications Networks, Content and Technology under grant agreement No 101069594.
|
||||
|
||||

|
||||

|
95
ROADMAP.md
Normal file
95
ROADMAP.md
Normal file
@@ -0,0 +1,95 @@
|
||||
# Roadmap 2025
|
||||
|
||||
This is a roadmap with major features planned for Marginalia Search.
|
||||
|
||||
It's not set in any particular order and other features will definitely
|
||||
be implemented as well.
|
||||
|
||||
Major goals:
|
||||
|
||||
* Reach 1 billion pages indexed
|
||||
|
||||
|
||||
* Improve technical ability of indexing and search. ~~Although this area has improved a bit, the
|
||||
search engine is still not very good at dealing with longer queries.~~ (As of PR [#129](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/129), this has improved significantly. There is still more work to be done )
|
||||
|
||||
## Hybridize crawler w/ Common Crawl data
|
||||
|
||||
Sometimes Marginalia's relatively obscure crawler is blocked when attempting to crawl a website, or for
|
||||
other technical reasons it may be prevented from doing so. A possible work-around is to hybridize the
|
||||
crawler so that it attempts to fetch such inaccessible websites from common crawl. This is an important
|
||||
step on the road to 1 billion pages indexed.
|
||||
|
||||
As a rough sketch, the crawler would identify target websites, consume CC's index, and then fetch the WARC data
|
||||
with byte range queries.
|
||||
|
||||
Retaining the ability to independently crawl the web is still strongly desirable so going full CC is not an option.
|
||||
|
||||
## Safe Search
|
||||
|
||||
The search engine has a bit of a problem showing spicy content mixed in with the results. It would be desirable to have a way to filter this out. It's likely something like a URL blacklist (e.g. [UT1](https://dsi.ut-capitole.fr/blacklists/index_en.php) )
|
||||
combined with naive bayesian filter would go a long way, or something more sophisticated...?
|
||||
|
||||
## Additional Language Support
|
||||
|
||||
It would be desirable if the search engine supported more languages than English. This is partially about
|
||||
rooting out assumptions regarding character encoding, but there's most likely some amount of custom logic
|
||||
associated with each language added, at least a models file or two, as well as some fine tuning.
|
||||
|
||||
It would be very helpful to find a speaker of a large language other than English to help in the fine tuning.
|
||||
|
||||
## Support for binary formats like PDF
|
||||
|
||||
The crawler needs to be modified to retain them, and the conversion logic needs to parse them.
|
||||
The documents database probably should have some sort of flag indicating it's a PDF as well.
|
||||
|
||||
PDF parsing is known to be a bit of a security liability so some thought needs to be put in
|
||||
that direction as well.
|
||||
|
||||
## Custom ranking logic
|
||||
|
||||
Stract does an interesting thing where they have configurable search filters.
|
||||
|
||||
This looks like a good idea that wouldn't just help clean up the search filters on the main
|
||||
website, but might be cheap enough we might go as far as to offer a number of ad-hoc custom search
|
||||
filter for any API consumer.
|
||||
|
||||
I've talked to the stract dev and he does not think it's a good idea to mimic their optics language, which is quite ad-hoc, but instead to work together to find some new common description language for this.
|
||||
|
||||
## Show favicons next to search results
|
||||
|
||||
This is expected from search engines. Basic proof of concept sketch of fetching this data has been done, but the feature is some way from being reality.
|
||||
|
||||
## Specialized crawler for github
|
||||
|
||||
One of the search engine's biggest limitations right now is that it does not index github at all. A specialized crawler that fetches at least the readme.md would go a long way toward providing search capabilities in this domain.
|
||||
|
||||
# Completed
|
||||
|
||||
## Web Design Overhaul (COMPLETED 2025-01)
|
||||
|
||||
The design is kinda clunky and hard to maintain, and needlessly outdated-looking.
|
||||
|
||||
PR [#127](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/127)
|
||||
|
||||
## Finalize RSS support (COMPLETED 2024-11)
|
||||
|
||||
Marginalia has experimental RSS preview support for a few domains. This works well and
|
||||
it should be extended to all domains. It would also be interesting to offer search of the
|
||||
RSS data itself, or use the RSS set to feed a special live index that updates faster than the
|
||||
main dataset.
|
||||
|
||||
Completed with PR [#122](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/122) and PR [#125](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/125)
|
||||
|
||||
## Proper Position Index (COMPLETED 2024-09)
|
||||
|
||||
The search engine uses a fixed width bit mask to indicate word positions. It has the benefit
|
||||
of being very fast to evaluate and works well for what it is, but is inaccurate and has the
|
||||
drawback of making support for quoted search terms inaccurate and largely reliant on indexing
|
||||
word n-grams known beforehand. This limits the ability to interpret longer queries.
|
||||
|
||||
The positions mask should be supplemented or replaced with a more accurate (e.g.) gamma coded positions
|
||||
list, as is the civilized way of doing this.
|
||||
|
||||
Completed with PR [#99](https://github.com/MarginaliaSearch/MarginaliaSearch/pull/99)
|
||||
|
100
build.gradle
100
build.gradle
@@ -1,62 +1,74 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id("org.jetbrains.gradle.plugin.idea-ext") version "1.0"
|
||||
id "me.champeau.jmh" version "0.6.6"
|
||||
|
||||
id 'com.github.johnrengelman.shadow' version '6.0.0'
|
||||
// This is a workaround for a bug in the Jib plugin that causes it to stall randomly
|
||||
// https://github.com/GoogleContainerTools/jib/issues/3347
|
||||
id 'com.google.cloud.tools.jib' version '3.4.5' apply(false)
|
||||
}
|
||||
|
||||
group 'nu.marginalia'
|
||||
group 'marginalia'
|
||||
version 'SNAPSHOT'
|
||||
|
||||
compileJava.options.encoding = "UTF-8"
|
||||
compileTestJava.options.encoding = "UTF-8"
|
||||
repositories {
|
||||
mavenLocal()
|
||||
maven { url "https://artifactory.cronapp.io/public-release/" }
|
||||
maven { url "https://repo1.maven.org/maven2/" }
|
||||
maven { url "https://www2.ph.ed.ac.uk/maven2/" }
|
||||
maven { url "https://jitpack.io/" }
|
||||
exclusiveContent {
|
||||
forRepository {
|
||||
maven {
|
||||
url = uri("https://jitpack.io")
|
||||
}
|
||||
}
|
||||
filter {
|
||||
// Only use JitPack for the `gson-record-type-adapter-factory` library
|
||||
includeModule("com.github.Marcono1234", "gson-record-type-adapter-factory")
|
||||
}
|
||||
|
||||
subprojects.forEach {it ->
|
||||
// Enable preview features for the entire project
|
||||
|
||||
if (it.path.contains(':code:')) {
|
||||
sourceSets.main.java.srcDirs += file('java')
|
||||
sourceSets.main.resources.srcDirs += file('resources')
|
||||
sourceSets.test.java.srcDirs += file('test')
|
||||
sourceSets.test.resources.srcDirs += file('test-resources')
|
||||
}
|
||||
|
||||
it.tasks.withType(JavaCompile).configureEach {
|
||||
options.compilerArgs += ['--enable-preview']
|
||||
}
|
||||
it.tasks.withType(JavaExec).configureEach {
|
||||
jvmArgs += ['--enable-preview']
|
||||
}
|
||||
it.tasks.withType(Test).configureEach {
|
||||
jvmArgs += ['--enable-preview']
|
||||
}
|
||||
|
||||
// Enable reproducible builds for the entire project
|
||||
it.tasks.withType(AbstractArchiveTask).configureEach {
|
||||
preserveFileTimestamps = false
|
||||
reproducibleFileOrder = true
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
shadowJar {
|
||||
zip64 true
|
||||
}
|
||||
jar {
|
||||
manifest {
|
||||
attributes 'Main-Class': "nu.marginalia.wmsa.configuration.ServiceDescriptor"
|
||||
}
|
||||
from {
|
||||
configurations.shadow.collect { it.isDirectory() ? it : zipTree(it) }
|
||||
}
|
||||
ext {
|
||||
jvmVersion = 24
|
||||
dockerImageBase='container-registry.oracle.com/graalvm/jdk:24'
|
||||
dockerImageTag='latest'
|
||||
dockerImageRegistry='marginalia'
|
||||
jibVersion = '3.4.5'
|
||||
}
|
||||
|
||||
idea {
|
||||
module {
|
||||
// Exclude these directories from being indexed by IntelliJ
|
||||
// as they tend to bring the IDE to its knees and use up all
|
||||
// Inotify spots in a hurry
|
||||
excludeDirs.add(file("$projectDir/run/node-1"))
|
||||
excludeDirs.add(file("$projectDir/run/node-2"))
|
||||
excludeDirs.add(file("$projectDir/run/model"))
|
||||
excludeDirs.add(file("$projectDir/run/dist"))
|
||||
excludeDirs.add(file("$projectDir/run/db"))
|
||||
excludeDirs.add(file("$projectDir/run/logs"))
|
||||
excludeDirs.add(file("$projectDir/run/data"))
|
||||
excludeDirs.add(file("$projectDir/run/conf"))
|
||||
excludeDirs.add(file("$projectDir/run/test-data"))
|
||||
}
|
||||
}
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(17))
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation project(':marginalia_nu')
|
||||
}
|
||||
task version() { //
|
||||
}
|
||||
|
||||
test {
|
||||
maxParallelForks = 16
|
||||
forkEvery = 1
|
||||
maxHeapSize = "8G"
|
||||
useJUnitPlatform {
|
||||
excludeTags "nobuild"
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
|
41
code/common/config/build.gradle
Normal file
41
code/common/config/build.gradle
Normal file
@@ -0,0 +1,41 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:common:db')
|
||||
implementation project(':code:common:model')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
implementation libs.bundles.mariadb
|
||||
implementation libs.mockito
|
||||
implementation libs.guava
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
implementation libs.gson
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
|
||||
|
||||
testImplementation project(':code:libraries:test-helpers')
|
||||
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation libs.commons.codec
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
testImplementation project(':code:libraries:test-helpers')
|
||||
|
||||
}
|
67
code/common/config/java/nu/marginalia/IndexLocations.java
Normal file
67
code/common/config/java/nu/marginalia/IndexLocations.java
Normal file
@@ -0,0 +1,67 @@
|
||||
package nu.marginalia;
|
||||
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.SQLException;
|
||||
|
||||
/** The IndexLocations class is responsible for knowledge about the locations
|
||||
* of various important system paths. The methods take a FileStorageService,
|
||||
* as these paths are node-dependent.
|
||||
*/
|
||||
public class IndexLocations {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(IndexLocations.class);
|
||||
/** Return the path to the current link database */
|
||||
public static Path getLinkdbLivePath(FileStorageService fileStorage) {
|
||||
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "ldbr");
|
||||
}
|
||||
|
||||
/** Return the path to the next link database */
|
||||
public static Path getLinkdbWritePath(FileStorageService fileStorage) {
|
||||
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "ldbw");
|
||||
}
|
||||
|
||||
/** Return the path to the current live index */
|
||||
public static Path getCurrentIndex(FileStorageService fileStorage) {
|
||||
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "ir");
|
||||
}
|
||||
|
||||
/** Return the path to the designated index construction area */
|
||||
public static Path getIndexConstructionArea(FileStorageService fileStorage) {
|
||||
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "iw");
|
||||
}
|
||||
|
||||
/** Return the path to the search sets */
|
||||
public static Path getSearchSetsPath(FileStorageService fileStorage) {
|
||||
return getStorage(fileStorage, FileStorageBaseType.CURRENT, "ss");
|
||||
}
|
||||
|
||||
private static Path getStorage(FileStorageService service, FileStorageBaseType baseType, String pathPart) {
|
||||
try {
|
||||
var base = service.getStorageBase(baseType);
|
||||
if (base == null) {
|
||||
throw new IllegalStateException("File storage base " + baseType + " is not configured!");
|
||||
}
|
||||
|
||||
// Ensure the directory exists
|
||||
Path ret = base.asPath().resolve(pathPart);
|
||||
if (!Files.exists(ret)) {
|
||||
logger.info("Creating system directory {}", ret);
|
||||
|
||||
Files.createDirectories(ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
catch (SQLException | IOException ex) {
|
||||
throw new IllegalStateException("Error fetching storage " + baseType + " / " + pathPart, ex);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
27
code/common/config/java/nu/marginalia/LanguageModels.java
Normal file
27
code/common/config/java/nu/marginalia/LanguageModels.java
Normal file
@@ -0,0 +1,27 @@
|
||||
package nu.marginalia;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
public class LanguageModels {
|
||||
public final Path termFrequencies;
|
||||
|
||||
public final Path openNLPSentenceDetectionData;
|
||||
public final Path posRules;
|
||||
public final Path posDict;
|
||||
public final Path fasttextLanguageModel;
|
||||
public final Path segments;
|
||||
|
||||
public LanguageModels(Path termFrequencies,
|
||||
Path openNLPSentenceDetectionData,
|
||||
Path posRules,
|
||||
Path posDict,
|
||||
Path fasttextLanguageModel,
|
||||
Path segments) {
|
||||
this.termFrequencies = termFrequencies;
|
||||
this.openNLPSentenceDetectionData = openNLPSentenceDetectionData;
|
||||
this.posRules = posRules;
|
||||
this.posDict = posDict;
|
||||
this.fasttextLanguageModel = fasttextLanguageModel;
|
||||
this.segments = segments;
|
||||
}
|
||||
}
|
3
code/common/config/java/nu/marginalia/UserAgent.java
Normal file
3
code/common/config/java/nu/marginalia/UserAgent.java
Normal file
@@ -0,0 +1,3 @@
|
||||
package nu.marginalia;
|
||||
|
||||
public record UserAgent(String uaString, String uaIdentifier) {}
|
7
code/common/config/java/nu/marginalia/WebsiteUrl.java
Normal file
7
code/common/config/java/nu/marginalia/WebsiteUrl.java
Normal file
@@ -0,0 +1,7 @@
|
||||
package nu.marginalia;
|
||||
|
||||
public record WebsiteUrl(String url) {
|
||||
public String withPath(String path) {
|
||||
return url + path;
|
||||
}
|
||||
}
|
117
code/common/config/java/nu/marginalia/WmsaHome.java
Normal file
117
code/common/config/java/nu/marginalia/WmsaHome.java
Normal file
@@ -0,0 +1,117 @@
|
||||
package nu.marginalia;
|
||||
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class WmsaHome {
|
||||
public static UserAgent getUserAgent() {
|
||||
return new UserAgent(
|
||||
System.getProperty("crawler.userAgentString", "Mozilla/5.0 (compatible; Marginalia-like bot; +https://git.marginalia.nu/))"),
|
||||
System.getProperty("crawler.userAgentIdentifier", "search.marginalia.nu")
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
public static Path getUploadDir() {
|
||||
return Path.of(
|
||||
System.getProperty("executor.uploadDir", "/uploads")
|
||||
);
|
||||
}
|
||||
|
||||
public static Path getHomePath() {
|
||||
String[] possibleLocations = new String[] {
|
||||
System.getenv("WMSA_HOME"),
|
||||
System.getProperty("system.homePath"),
|
||||
"/var/lib/wmsa",
|
||||
"/wmsa"
|
||||
};
|
||||
|
||||
Optional<String> retStr = Stream.of(possibleLocations)
|
||||
.filter(Objects::nonNull)
|
||||
.map(Path::of)
|
||||
.filter(Files::isDirectory)
|
||||
.map(Path::toString)
|
||||
.findFirst();
|
||||
|
||||
if (retStr.isEmpty()) {
|
||||
// Check parent directories for a fingerprint of the project's installation boilerplate
|
||||
var prodRoot = Stream.iterate(Paths.get("").toAbsolutePath(), f -> f != null && Files.exists(f), Path::getParent)
|
||||
.filter(p -> Files.exists(p.resolve("conf/properties/system.properties")))
|
||||
.filter(p -> Files.exists(p.resolve("model/tfreq-new-algo3.bin")))
|
||||
.findAny();
|
||||
if (prodRoot.isPresent()) {
|
||||
return prodRoot.get();
|
||||
}
|
||||
|
||||
// Check if we are running in a test environment by looking for fingerprints
|
||||
// matching the base of the source tree for the project, then looking up the
|
||||
// run directory which contains a template for the installation we can use as
|
||||
// though it's the project root for testing purposes
|
||||
|
||||
var testRoot = Stream.iterate(Paths.get("").toAbsolutePath(), f -> f != null && Files.exists(f), Path::getParent)
|
||||
.filter(p -> Files.exists(p.resolve("run/env")))
|
||||
.filter(p -> Files.exists(p.resolve("run/setup.sh")))
|
||||
.map(p -> p.resolve("run"))
|
||||
.findAny();
|
||||
|
||||
return testRoot.orElseThrow(() -> new IllegalStateException("""
|
||||
Could not find $WMSA_HOME, either set environment
|
||||
variable, the 'system.homePath' java property,
|
||||
or ensure either /wmsa or /var/lib/wmsa exists
|
||||
"""));
|
||||
}
|
||||
|
||||
var ret = Path.of(retStr.get());
|
||||
|
||||
if (!Files.isDirectory(ret.resolve("model"))) {
|
||||
throw new IllegalStateException("You need to run 'run/setup.sh' to download models to run/ before this will work!");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static Path getDataPath() {
|
||||
return getHomePath().resolve("data");
|
||||
}
|
||||
|
||||
public static Path getAdsDefinition() {
|
||||
return getHomePath().resolve("data").resolve("adblock.txt");
|
||||
}
|
||||
|
||||
public static Path getIPLocationDatabse() {
|
||||
return getHomePath().resolve("data").resolve("IP2LOCATION-LITE-DB1.CSV");
|
||||
|
||||
}
|
||||
|
||||
public static Path getAsnMappingDatabase() {
|
||||
return getHomePath().resolve("data").resolve("asn-data-raw-table");
|
||||
}
|
||||
|
||||
public static Path getAsnInfoDatabase() {
|
||||
return getHomePath().resolve("data").resolve("asn-used-autnums");
|
||||
}
|
||||
|
||||
public static LanguageModels getLanguageModels() {
|
||||
final Path home = getHomePath();
|
||||
|
||||
return new LanguageModels(
|
||||
home.resolve("model/tfreq-new-algo3.bin"),
|
||||
home.resolve("model/opennlp-sentence.bin"),
|
||||
home.resolve("model/English.RDR"),
|
||||
home.resolve("model/English.DICT"),
|
||||
home.resolve("model/lid.176.ftz"),
|
||||
home.resolve("model/segments.bin")
|
||||
);
|
||||
}
|
||||
|
||||
public static Path getAtagsPath() {
|
||||
return getHomePath().resolve("data/atags.parquet");
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -0,0 +1,123 @@
|
||||
package nu.marginalia.nodecfg;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.nodecfg.model.NodeConfiguration;
|
||||
import nu.marginalia.nodecfg.model.NodeProfile;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class NodeConfigurationService {
|
||||
private final Logger logger = LoggerFactory.getLogger(NodeConfigurationService.class);
|
||||
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
@Inject
|
||||
public NodeConfigurationService(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public NodeConfiguration create(int id, String description, boolean acceptQueries, boolean keepWarcs, NodeProfile nodeProfile) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var is = conn.prepareStatement("""
|
||||
INSERT IGNORE INTO NODE_CONFIGURATION(ID, DESCRIPTION, ACCEPT_QUERIES, KEEP_WARCS, NODE_PROFILE) VALUES(?, ?, ?, ?, ?)
|
||||
""")
|
||||
)
|
||||
{
|
||||
is.setInt(1, id);
|
||||
is.setString(2, description);
|
||||
is.setBoolean(3, acceptQueries);
|
||||
is.setBoolean(4, keepWarcs);
|
||||
is.setString(5, nodeProfile.name());
|
||||
|
||||
if (is.executeUpdate() <= 0) {
|
||||
throw new IllegalStateException("Failed to insert configuration");
|
||||
}
|
||||
|
||||
return get(id);
|
||||
}
|
||||
}
|
||||
|
||||
public List<NodeConfiguration> getAll() {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var qs = conn.prepareStatement("""
|
||||
SELECT ID, DESCRIPTION, ACCEPT_QUERIES, AUTO_CLEAN, PRECESSION, KEEP_WARCS, NODE_PROFILE, DISABLED
|
||||
FROM NODE_CONFIGURATION
|
||||
""")) {
|
||||
var rs = qs.executeQuery();
|
||||
|
||||
List<NodeConfiguration> ret = new ArrayList<>();
|
||||
|
||||
while (rs.next()) {
|
||||
ret.add(new NodeConfiguration(
|
||||
rs.getInt("ID"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
rs.getBoolean("ACCEPT_QUERIES"),
|
||||
rs.getBoolean("AUTO_CLEAN"),
|
||||
rs.getBoolean("PRECESSION"),
|
||||
rs.getBoolean("KEEP_WARCS"),
|
||||
NodeProfile.valueOf(rs.getString("NODE_PROFILE")),
|
||||
rs.getBoolean("DISABLED")
|
||||
));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.warn("Failed to get node configurations", ex);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
public NodeConfiguration get(int nodeId) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var qs = conn.prepareStatement("""
|
||||
SELECT ID, DESCRIPTION, ACCEPT_QUERIES, AUTO_CLEAN, PRECESSION, KEEP_WARCS, NODE_PROFILE, DISABLED
|
||||
FROM NODE_CONFIGURATION
|
||||
WHERE ID=?
|
||||
""")) {
|
||||
qs.setInt(1, nodeId);
|
||||
var rs = qs.executeQuery();
|
||||
if (rs.next()) {
|
||||
return new NodeConfiguration(
|
||||
rs.getInt("ID"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
rs.getBoolean("ACCEPT_QUERIES"),
|
||||
rs.getBoolean("AUTO_CLEAN"),
|
||||
rs.getBoolean("PRECESSION"),
|
||||
rs.getBoolean("KEEP_WARCS"),
|
||||
NodeProfile.valueOf(rs.getString("NODE_PROFILE")),
|
||||
rs.getBoolean("DISABLED")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public void save(NodeConfiguration config) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var us = conn.prepareStatement("""
|
||||
UPDATE NODE_CONFIGURATION
|
||||
SET DESCRIPTION=?, ACCEPT_QUERIES=?, AUTO_CLEAN=?, PRECESSION=?, KEEP_WARCS=?, DISABLED=?, NODE_PROFILE=?
|
||||
WHERE ID=?
|
||||
"""))
|
||||
{
|
||||
us.setString(1, config.description());
|
||||
us.setBoolean(2, config.acceptQueries());
|
||||
us.setBoolean(3, config.autoClean());
|
||||
us.setBoolean(4, config.includeInPrecession());
|
||||
us.setBoolean(5, config.keepWarcs());
|
||||
us.setBoolean(6, config.disabled());
|
||||
us.setString(7, config.profile().name());
|
||||
us.setInt(8, config.node());
|
||||
|
||||
if (us.executeUpdate() <= 0)
|
||||
throw new IllegalStateException("Failed to update configuration");
|
||||
|
||||
}
|
||||
}
|
||||
}
|
@@ -0,0 +1,16 @@
|
||||
package nu.marginalia.nodecfg.model;
|
||||
|
||||
public record NodeConfiguration(int node,
|
||||
String description,
|
||||
boolean acceptQueries,
|
||||
boolean autoClean,
|
||||
boolean includeInPrecession,
|
||||
boolean keepWarcs,
|
||||
NodeProfile profile,
|
||||
boolean disabled
|
||||
)
|
||||
{
|
||||
public int getId() {
|
||||
return node;
|
||||
}
|
||||
}
|
@@ -0,0 +1,28 @@
|
||||
package nu.marginalia.nodecfg.model;
|
||||
|
||||
public enum NodeProfile {
|
||||
BATCH_CRAWL,
|
||||
REALTIME,
|
||||
MIXED,
|
||||
SIDELOAD;
|
||||
|
||||
public boolean isBatchCrawl() {
|
||||
return this == BATCH_CRAWL;
|
||||
}
|
||||
public boolean isRealtime() {
|
||||
return this == REALTIME;
|
||||
}
|
||||
public boolean isMixed() {
|
||||
return this == MIXED;
|
||||
}
|
||||
public boolean isSideload() {
|
||||
return this == SIDELOAD;
|
||||
}
|
||||
|
||||
public boolean permitBatchCrawl() {
|
||||
return isBatchCrawl() ||isMixed();
|
||||
}
|
||||
public boolean permitSideload() {
|
||||
return isMixed() || isSideload();
|
||||
}
|
||||
}
|
@@ -0,0 +1,51 @@
|
||||
package nu.marginalia.storage;
|
||||
|
||||
import com.google.gson.Gson;
|
||||
import nu.marginalia.model.gson.GsonFactory;
|
||||
import nu.marginalia.storage.model.FileStorage;
|
||||
import nu.marginalia.storage.model.FileStorageType;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.util.Optional;
|
||||
|
||||
record FileStorageManifest(FileStorageType type, String description) {
|
||||
private static final Gson gson = GsonFactory.get();
|
||||
private static final String fileName = "marginalia-manifest.json";
|
||||
private static final Logger logger = LoggerFactory.getLogger(FileStorageManifest.class);
|
||||
|
||||
public static Optional<FileStorageManifest> find(Path directory) {
|
||||
Path expectedFileName = directory.resolve(fileName);
|
||||
|
||||
if (!Files.isRegularFile(expectedFileName) ||
|
||||
!Files.isReadable(expectedFileName)) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
try (var reader = Files.newBufferedReader(expectedFileName)) {
|
||||
return Optional.of(gson.fromJson(reader, FileStorageManifest.class));
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.warn("Failed to read manifest " + expectedFileName, e);
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public void write(FileStorage dir) {
|
||||
Path expectedFileName = dir.asPath().resolve(fileName);
|
||||
|
||||
try (var writer = Files.newBufferedWriter(expectedFileName,
|
||||
StandardOpenOption.CREATE,
|
||||
StandardOpenOption.TRUNCATE_EXISTING))
|
||||
{
|
||||
gson.toJson(this, writer);
|
||||
}
|
||||
catch (Exception e) {
|
||||
logger.warn("Failed to write manifest " + expectedFileName, e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,582 @@
|
||||
package nu.marginalia.storage;
|
||||
|
||||
import com.google.inject.name.Named;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.storage.model.*;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.*;
|
||||
import java.nio.file.attribute.PosixFilePermissions;
|
||||
import java.sql.SQLException;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
|
||||
/** Manages file storage for processes and services
|
||||
*/
|
||||
@Singleton
|
||||
public class FileStorageService {
|
||||
private final HikariDataSource dataSource;
|
||||
private final int node;
|
||||
private final Logger logger = LoggerFactory.getLogger(FileStorageService.class);
|
||||
|
||||
private static final DateTimeFormatter dirNameDatePattern = DateTimeFormatter.ofPattern("__uu-MM-dd'T'HH_mm_ss.SSS"); // filesystem safe ISO8601
|
||||
|
||||
@Inject
|
||||
public FileStorageService(HikariDataSource dataSource,
|
||||
@Named("wmsa-system-node") Integer node) {
|
||||
this.dataSource = dataSource;
|
||||
this.node = node;
|
||||
|
||||
logger.info("Resolving file storage root into {}", resolveStoragePath("/").toAbsolutePath());
|
||||
}
|
||||
|
||||
/** Resolve a storage path from a relative path, injecting the system configured storage root
|
||||
* if set */
|
||||
public static Path resolveStoragePath(String path) {
|
||||
if (path.startsWith("/")) {
|
||||
// Since Path.of("ANYTHING").resolve("/foo") = "/foo", we need to strip
|
||||
// the leading slash
|
||||
return resolveStoragePath(path.substring(1));
|
||||
}
|
||||
|
||||
return Path
|
||||
.of(System.getProperty("storage.root", "/"))
|
||||
.resolve(path);
|
||||
}
|
||||
|
||||
/** @return the storage base with the given id, or null if it does not exist */
|
||||
public FileStorageBase getStorageBase(FileStorageBaseId id) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT ID, NAME, NODE, PATH, TYPE
|
||||
FROM FILE_STORAGE_BASE WHERE ID = ?
|
||||
""")) {
|
||||
stmt.setLong(1, id.id());
|
||||
try (var rs = stmt.executeQuery()) {
|
||||
if (rs.next()) {
|
||||
return new FileStorageBase(
|
||||
new FileStorageBaseId(rs.getLong("ID")),
|
||||
FileStorageBaseType.valueOf(rs.getString("TYPE")),
|
||||
rs.getInt("NODE"),
|
||||
rs.getString("NAME"),
|
||||
rs.getString("PATH")
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public void synchronizeStorageManifests(FileStorageBase base) {
|
||||
Set<String> ignoredPaths = new HashSet<>();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT FILE_STORAGE.PATH
|
||||
FROM FILE_STORAGE INNER JOIN FILE_STORAGE_BASE
|
||||
ON BASE_ID = FILE_STORAGE_BASE.ID
|
||||
WHERE BASE_ID = ?
|
||||
AND NODE = ?
|
||||
""")) {
|
||||
|
||||
stmt.setLong(1, base.id().id());
|
||||
stmt.setInt(2, node);
|
||||
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
ignoredPaths.add(rs.getString(1));
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
File basePathFile = base.asPath().toFile();
|
||||
File[] files = basePathFile.listFiles(pathname -> pathname.isDirectory() && !ignoredPaths.contains(pathname.getName()));
|
||||
if (files == null) return;
|
||||
for (File file : files) {
|
||||
var maybeManifest = FileStorageManifest.find(file.toPath());
|
||||
if (maybeManifest.isEmpty()) continue;
|
||||
var manifest = maybeManifest.get();
|
||||
|
||||
logger.info("Discovered new file storage: " + file.getName() + " (" + manifest.type() + ")");
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
INSERT INTO FILE_STORAGE(BASE_ID, PATH, TYPE, DESCRIPTION)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""")) {
|
||||
stmt.setLong(1, base.id().id());
|
||||
stmt.setString(2, file.getName());
|
||||
stmt.setString(3, manifest.type().name());
|
||||
stmt.setString(4, manifest.description());
|
||||
stmt.execute();
|
||||
conn.commit();
|
||||
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void relateFileStorages(FileStorageId source, FileStorageId target) {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
INSERT INTO FILE_STORAGE_RELATION(SOURCE_ID, TARGET_ID) VALUES (?, ?)
|
||||
""")) {
|
||||
stmt.setLong(1, source.id());
|
||||
stmt.setLong(2, target.id());
|
||||
stmt.executeUpdate();
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public List<FileStorage> getSourceFromStorage(FileStorage storage) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT SOURCE_ID FROM FILE_STORAGE_RELATION WHERE TARGET_ID = ?
|
||||
""")) {
|
||||
stmt.setLong(1, storage.id().id());
|
||||
var rs = stmt.executeQuery();
|
||||
List<FileStorage> ret = new ArrayList<>();
|
||||
while (rs.next()) {
|
||||
ret.add(getStorage(new FileStorageId(rs.getLong(1))));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
/** @return the storage base with the given type, or null if it does not exist */
|
||||
public FileStorageBase getStorageBase(FileStorageBaseType type) throws SQLException {
|
||||
return getStorageBase(type, node);
|
||||
}
|
||||
|
||||
public FileStorageBase getStorageBase(FileStorageBaseType type, int node) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT ID, NAME, NODE, PATH, TYPE
|
||||
FROM FILE_STORAGE_BASE WHERE TYPE = ? AND NODE = ?
|
||||
""")) {
|
||||
stmt.setString(1, type.name());
|
||||
stmt.setInt(2, node);
|
||||
try (var rs = stmt.executeQuery()) {
|
||||
if (rs.next()) {
|
||||
return new FileStorageBase(
|
||||
new FileStorageBaseId(rs.getLong("ID")),
|
||||
FileStorageBaseType.valueOf(rs.getString("TYPE")),
|
||||
rs.getInt("NODE"),
|
||||
rs.getString("NAME"),
|
||||
rs.getString("PATH")
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public FileStorageBase createStorageBase(String name, Path path, FileStorageBaseType type) throws SQLException {
|
||||
return createStorageBase(name, path, node, type);
|
||||
}
|
||||
|
||||
public FileStorageBase createStorageBase(String name, Path path, int node, FileStorageBaseType type) throws SQLException {
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
INSERT INTO FILE_STORAGE_BASE(NAME, PATH, TYPE, NODE)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""")) {
|
||||
stmt.setString(1, name);
|
||||
stmt.setString(2, path.toString());
|
||||
stmt.setString(3, type.name());
|
||||
stmt.setInt(4, node);
|
||||
|
||||
int update = stmt.executeUpdate();
|
||||
if (update < 0) {
|
||||
throw new SQLException("Failed to create storage base");
|
||||
}
|
||||
}
|
||||
|
||||
return getStorageBase(type);
|
||||
}
|
||||
|
||||
private Path allocateDirectory(Path basePath, String prefix) throws IOException {
|
||||
LocalDateTime now = LocalDateTime.now();
|
||||
String timestampPart = now.format(dirNameDatePattern);
|
||||
Path maybePath = basePath.resolve(prefix + timestampPart);
|
||||
|
||||
try {
|
||||
Files.createDirectory(maybePath,
|
||||
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))
|
||||
);
|
||||
}
|
||||
catch (FileAlreadyExistsException ex) {
|
||||
// in case of a race condition, try again with some random cruft at the end
|
||||
maybePath = basePath.resolve(prefix + timestampPart + "_" + Long.toHexString(ThreadLocalRandom.current().nextLong()));
|
||||
|
||||
Files.createDirectory(maybePath,
|
||||
PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-xr-x"))
|
||||
);
|
||||
}
|
||||
|
||||
// Ensure umask didn't mess with the access permissions
|
||||
Files.setPosixFilePermissions(maybePath, PosixFilePermissions.fromString("rwxr-xr-x"));
|
||||
|
||||
return maybePath;
|
||||
}
|
||||
|
||||
/** Allocate a storage area of the given type */
|
||||
public FileStorage allocateStorage(FileStorageType type,
|
||||
String prefix,
|
||||
String description) throws IOException, SQLException
|
||||
{
|
||||
var base = getStorageBase(FileStorageBaseType.forFileStorageType(type));
|
||||
|
||||
if (null == base)
|
||||
throw new IllegalStateException("No storage base for type " + type + " on node " + node);
|
||||
|
||||
Path newDir = allocateDirectory(base.asPath(), prefix);
|
||||
|
||||
String relDir = base.asPath().relativize(newDir).normalize().toString();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var insert = conn.prepareStatement("""
|
||||
INSERT INTO FILE_STORAGE(PATH, TYPE, DESCRIPTION, BASE_ID)
|
||||
VALUES (?, ?, ?, ?)
|
||||
""");
|
||||
var query = conn.prepareStatement("""
|
||||
SELECT ID FROM FILE_STORAGE WHERE PATH = ? AND BASE_ID = ?
|
||||
""")
|
||||
) {
|
||||
insert.setString(1, relDir);
|
||||
insert.setString(2, type.name());
|
||||
insert.setString(3, description);
|
||||
insert.setLong(4, base.id().id());
|
||||
|
||||
if (insert.executeUpdate() < 1) {
|
||||
throw new SQLException("Failed to insert storage");
|
||||
}
|
||||
|
||||
|
||||
query.setString(1, relDir);
|
||||
query.setLong(2, base.id().id());
|
||||
var rs = query.executeQuery();
|
||||
|
||||
if (rs.next()) {
|
||||
var storage = getStorage(new FileStorageId(rs.getLong("ID")));
|
||||
|
||||
// Write a manifest file so we can pick this up later without needing to insert it into DB
|
||||
// (e.g. when loading from outside the system)
|
||||
var manifest = new FileStorageManifest(type, description);
|
||||
manifest.write(storage);
|
||||
|
||||
return storage;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
throw new SQLException("Failed to insert storage");
|
||||
}
|
||||
|
||||
|
||||
public FileStorage getStorageByType(FileStorageType type) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT PATH, STATE, DESCRIPTION, ID, BASE_ID, CREATE_DATE
|
||||
FROM FILE_STORAGE_VIEW WHERE TYPE = ? AND NODE = ?
|
||||
""")) {
|
||||
stmt.setString(1, type.name());
|
||||
stmt.setInt(2, node);
|
||||
|
||||
long storageId;
|
||||
long baseId;
|
||||
String path;
|
||||
String state;
|
||||
String description;
|
||||
LocalDateTime createDateTime;
|
||||
|
||||
try (var rs = stmt.executeQuery()) {
|
||||
if (rs.next()) {
|
||||
baseId = rs.getLong("BASE_ID");
|
||||
storageId = rs.getLong("ID");
|
||||
createDateTime = rs.getTimestamp("CREATE_DATE").toLocalDateTime();
|
||||
path = rs.getString("PATH");
|
||||
state = rs.getString("STATE");
|
||||
description = rs.getString("DESCRIPTION");
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
|
||||
var base = getStorageBase(new FileStorageBaseId(baseId));
|
||||
|
||||
return new FileStorage(
|
||||
new FileStorageId(storageId),
|
||||
base,
|
||||
type,
|
||||
createDateTime,
|
||||
path,
|
||||
FileStorageState.parse(state),
|
||||
description
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<FileStorage> getStorage(List<FileStorageId> ids) throws SQLException {
|
||||
List<FileStorage> ret = new ArrayList<>();
|
||||
for (var id : ids) {
|
||||
var storage = getStorage(id);
|
||||
if (storage == null) continue;
|
||||
ret.add(storage);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** @return the storage with the given id, or null if it does not exist */
|
||||
public FileStorage getStorage(FileStorageId id) throws SQLException {
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT PATH, TYPE, STATE, DESCRIPTION, CREATE_DATE, ID, BASE_ID
|
||||
FROM FILE_STORAGE_VIEW WHERE ID = ?
|
||||
""")) {
|
||||
stmt.setLong(1, id.id());
|
||||
|
||||
long storageId;
|
||||
long baseId;
|
||||
String path;
|
||||
String state;
|
||||
String description;
|
||||
FileStorageType type;
|
||||
LocalDateTime createDateTime;
|
||||
|
||||
try (var rs = stmt.executeQuery()) {
|
||||
if (rs.next()) {
|
||||
baseId = rs.getLong("BASE_ID");
|
||||
storageId = rs.getLong("ID");
|
||||
type = FileStorageType.valueOf(rs.getString("TYPE"));
|
||||
path = rs.getString("PATH");
|
||||
state = rs.getString("STATE");
|
||||
description = rs.getString("DESCRIPTION");
|
||||
createDateTime = rs.getTimestamp("CREATE_DATE").toLocalDateTime();
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
|
||||
var base = getStorageBase(new FileStorageBaseId(baseId));
|
||||
|
||||
return new FileStorage(
|
||||
new FileStorageId(storageId),
|
||||
base,
|
||||
type,
|
||||
createDateTime,
|
||||
path,
|
||||
FileStorageState.parse(state),
|
||||
description
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void deregisterFileStorage(FileStorageId id) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
DELETE FROM FILE_STORAGE WHERE ID = ?
|
||||
""")) {
|
||||
stmt.setLong(1, id.id());
|
||||
stmt.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public List<FileStorage> getEachFileStorage() {
|
||||
List<FileStorage> ret = new ArrayList<>();
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT PATH, STATE, TYPE, DESCRIPTION, CREATE_DATE, ID, BASE_ID
|
||||
FROM FILE_STORAGE_VIEW
|
||||
WHERE NODE=?
|
||||
""")) {
|
||||
|
||||
stmt.setInt(1, node);
|
||||
|
||||
long storageId;
|
||||
long baseId;
|
||||
String path;
|
||||
String state;
|
||||
String description;
|
||||
LocalDateTime createDateTime;
|
||||
FileStorageType type;
|
||||
|
||||
try (var rs = stmt.executeQuery()) {
|
||||
while (rs.next()) {
|
||||
baseId = rs.getLong("BASE_ID");
|
||||
storageId = rs.getLong("ID");
|
||||
path = rs.getString("PATH");
|
||||
state = rs.getString("STATE");
|
||||
|
||||
try {
|
||||
type = FileStorageType.valueOf(rs.getString("TYPE"));
|
||||
}
|
||||
catch (IllegalArgumentException ex) {
|
||||
logger.warn("Illegal file storage type {} in db", rs.getString("TYPE"));
|
||||
continue;
|
||||
}
|
||||
|
||||
description = rs.getString("DESCRIPTION");
|
||||
createDateTime = rs.getTimestamp("CREATE_DATE").toLocalDateTime();
|
||||
var base = getStorageBase(new FileStorageBaseId(baseId));
|
||||
|
||||
ret.add(new FileStorage(
|
||||
new FileStorageId(storageId),
|
||||
base,
|
||||
type,
|
||||
createDateTime,
|
||||
path,
|
||||
FileStorageState.parse(state),
|
||||
description
|
||||
));
|
||||
}
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public List<FileStorage> getEachFileStorage(FileStorageType type) {
|
||||
return getEachFileStorage(node, type);
|
||||
}
|
||||
|
||||
public List<FileStorage> getEachFileStorage(int node, FileStorageType type) {
|
||||
List<FileStorage> ret = new ArrayList<>();
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT PATH, STATE, TYPE, DESCRIPTION, CREATE_DATE, ID, BASE_ID
|
||||
FROM FILE_STORAGE_VIEW
|
||||
WHERE NODE=? AND TYPE=?
|
||||
""")) {
|
||||
|
||||
stmt.setInt(1, node);
|
||||
stmt.setString(2, type.name());
|
||||
|
||||
long storageId;
|
||||
long baseId;
|
||||
String path;
|
||||
String state;
|
||||
String description;
|
||||
LocalDateTime createDateTime;
|
||||
|
||||
try (var rs = stmt.executeQuery()) {
|
||||
while (rs.next()) {
|
||||
baseId = rs.getLong("BASE_ID");
|
||||
storageId = rs.getLong("ID");
|
||||
path = rs.getString("PATH");
|
||||
state = rs.getString("STATE");
|
||||
|
||||
description = rs.getString("DESCRIPTION");
|
||||
createDateTime = rs.getTimestamp("CREATE_DATE").toLocalDateTime();
|
||||
var base = getStorageBase(new FileStorageBaseId(baseId));
|
||||
|
||||
ret.add(new FileStorage(
|
||||
new FileStorageId(storageId),
|
||||
base,
|
||||
type,
|
||||
createDateTime,
|
||||
path,
|
||||
FileStorageState.parse(state),
|
||||
description
|
||||
));
|
||||
}
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
public void flagFileForDeletion(FileStorageId id) throws SQLException {
|
||||
setFileStorageState(id, FileStorageState.DELETE);
|
||||
}
|
||||
|
||||
public void enableFileStorage(FileStorageId id) throws SQLException {
|
||||
setFileStorageState(id, FileStorageState.ACTIVE);
|
||||
}
|
||||
public void disableFileStorage(FileStorageId id) throws SQLException {
|
||||
setFileStorageState(id, FileStorageState.UNSET);
|
||||
}
|
||||
|
||||
public void setFileStorageState(FileStorageId id, FileStorageState state) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var flagStmt = conn.prepareStatement("UPDATE FILE_STORAGE SET STATE = ? WHERE ID = ?")) {
|
||||
String value = state == FileStorageState.UNSET ? "" : state.name();
|
||||
flagStmt.setString(1, value);
|
||||
flagStmt.setLong(2, id.id());
|
||||
flagStmt.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public void disableFileStorageOfType(int nodeId, FileStorageType type) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var flagStmt = conn.prepareStatement("""
|
||||
UPDATE FILE_STORAGE
|
||||
INNER JOIN FILE_STORAGE_BASE ON BASE_ID=FILE_STORAGE_BASE.ID
|
||||
SET FILE_STORAGE.STATE = ''
|
||||
WHERE FILE_STORAGE.TYPE = ?
|
||||
AND FILE_STORAGE.TYPE = 'ACTIVE'
|
||||
AND FILE_STORAGE_BASE.NODE=?
|
||||
""")) {
|
||||
flagStmt.setString(1, type.name());
|
||||
flagStmt.setInt(2, nodeId);
|
||||
flagStmt.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
public List<FileStorageId> getActiveFileStorages(FileStorageType type) throws SQLException {
|
||||
return getActiveFileStorages(node, type);
|
||||
}
|
||||
public Optional<FileStorageId> getOnlyActiveFileStorage(FileStorageType type) throws SQLException {
|
||||
return getOnlyActiveFileStorage(node, type);
|
||||
}
|
||||
|
||||
public Optional<FileStorageId> getOnlyActiveFileStorage(int nodeId, FileStorageType type) throws SQLException {
|
||||
var storages = getActiveFileStorages(nodeId, type);
|
||||
if (storages.size() > 1) {
|
||||
throw new IllegalStateException("Expected [0,1] instances of FileStorage with type " + type + ", found " + storages.size());
|
||||
}
|
||||
return storages.stream().findFirst();
|
||||
}
|
||||
|
||||
public List<FileStorageId> getActiveFileStorages(int nodeId, FileStorageType type) throws SQLException
|
||||
{
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var queryStmt = conn.prepareStatement("""
|
||||
SELECT FILE_STORAGE.ID FROM FILE_STORAGE
|
||||
INNER JOIN FILE_STORAGE_BASE ON BASE_ID=FILE_STORAGE_BASE.ID
|
||||
WHERE FILE_STORAGE.TYPE = ?
|
||||
AND STATE='ACTIVE'
|
||||
AND FILE_STORAGE_BASE.NODE=?
|
||||
""")) {
|
||||
queryStmt.setString(1, type.name());
|
||||
queryStmt.setInt(2, nodeId);
|
||||
var rs = queryStmt.executeQuery();
|
||||
List<FileStorageId> ids = new ArrayList<>();
|
||||
while (rs.next()) {
|
||||
ids.add(new FileStorageId(rs.getInt(1)));
|
||||
}
|
||||
return ids;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,80 @@
|
||||
package nu.marginalia.storage.model;
|
||||
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a file storage area
|
||||
*
|
||||
* @param id the id of the storage in the database
|
||||
* @param base the base of the storage
|
||||
* @param type the type of data expected
|
||||
* @param path the full path of the storage on disk
|
||||
* @param description a description of the storage
|
||||
*/
|
||||
public record FileStorage (
|
||||
FileStorageId id,
|
||||
FileStorageBase base,
|
||||
FileStorageType type,
|
||||
LocalDateTime createDateTime,
|
||||
String path,
|
||||
FileStorageState state,
|
||||
String description)
|
||||
{
|
||||
|
||||
public int node() {
|
||||
return base.node();
|
||||
}
|
||||
|
||||
public Path asPath() {
|
||||
return FileStorageService.resolveStoragePath(path);
|
||||
}
|
||||
|
||||
|
||||
public boolean isActive() {
|
||||
return FileStorageState.ACTIVE.equals(state);
|
||||
}
|
||||
public boolean isNoState() {
|
||||
return FileStorageState.UNSET.equals(state);
|
||||
}
|
||||
public boolean isDelete() {
|
||||
return FileStorageState.DELETE.equals(state);
|
||||
}
|
||||
public boolean isNew() {
|
||||
return FileStorageState.NEW.equals(state);
|
||||
}
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
FileStorage that = (FileStorage) o;
|
||||
|
||||
// Exclude timestamp as it may different due to how the objects
|
||||
// are constructed
|
||||
|
||||
if (!Objects.equals(id, that.id)) return false;
|
||||
if (!Objects.equals(base, that.base)) return false;
|
||||
if (type != that.type) return false;
|
||||
if (!Objects.equals(path, that.path)) return false;
|
||||
return Objects.equals(description, that.description);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = id != null ? id.hashCode() : 0;
|
||||
result = 31 * result + (base != null ? base.hashCode() : 0);
|
||||
result = 31 * result + (type != null ? type.hashCode() : 0);
|
||||
result = 31 * result + (path != null ? path.hashCode() : 0);
|
||||
result = 31 * result + (description != null ? description.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
public String date() {
|
||||
return createDateTime.format(DateTimeFormatter.ISO_LOCAL_DATE_TIME);
|
||||
}
|
||||
}
|
@@ -0,0 +1,30 @@
|
||||
package nu.marginalia.storage.model;
|
||||
|
||||
import nu.marginalia.storage.FileStorageService;
|
||||
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* Represents a file storage base directory
|
||||
*
|
||||
* @param id the id of the storage base in the database
|
||||
* @param type the type of the storage base
|
||||
* @param name the name of the storage base
|
||||
* @param path the path of the storage base
|
||||
*/
|
||||
public record FileStorageBase(FileStorageBaseId id,
|
||||
FileStorageBaseType type,
|
||||
int node,
|
||||
String name,
|
||||
String path
|
||||
) {
|
||||
|
||||
public Path asPath() {
|
||||
return FileStorageService.resolveStoragePath(path);
|
||||
}
|
||||
|
||||
public boolean isValid() {
|
||||
return id.id() >= 0;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,8 @@
|
||||
package nu.marginalia.storage.model;
|
||||
|
||||
public record FileStorageBaseId(long id) {
|
||||
|
||||
public String toString() {
|
||||
return Long.toString(id);
|
||||
}
|
||||
}
|
@@ -0,0 +1,17 @@
|
||||
package nu.marginalia.storage.model;
|
||||
|
||||
public enum FileStorageBaseType {
|
||||
CURRENT,
|
||||
WORK,
|
||||
STORAGE,
|
||||
BACKUP;
|
||||
|
||||
|
||||
public static FileStorageBaseType forFileStorageType(FileStorageType type) {
|
||||
return switch (type) {
|
||||
case EXPORT, CRAWL_DATA, PROCESSED_DATA, CRAWL_SPEC -> STORAGE;
|
||||
case BACKUP -> BACKUP;
|
||||
};
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,14 @@
|
||||
package nu.marginalia.storage.model;
|
||||
|
||||
public record FileStorageId(long id) {
|
||||
public static FileStorageId parse(String str) {
|
||||
return new FileStorageId(Long.parseLong(str));
|
||||
}
|
||||
public static FileStorageId of(long storageId) {
|
||||
return new FileStorageId(storageId);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return Long.toString(id);
|
||||
}
|
||||
}
|
@@ -0,0 +1,15 @@
|
||||
package nu.marginalia.storage.model;
|
||||
|
||||
public enum FileStorageState {
|
||||
UNSET,
|
||||
NEW,
|
||||
ACTIVE,
|
||||
DELETE;
|
||||
|
||||
public static FileStorageState parse(String value) {
|
||||
if ("".equals(value)) {
|
||||
return UNSET;
|
||||
}
|
||||
return valueOf(value);
|
||||
}
|
||||
}
|
@@ -0,0 +1,11 @@
|
||||
package nu.marginalia.storage.model;
|
||||
|
||||
public enum FileStorageType {
|
||||
@Deprecated
|
||||
CRAWL_SPEC, //
|
||||
|
||||
CRAWL_DATA,
|
||||
PROCESSED_DATA,
|
||||
BACKUP,
|
||||
EXPORT;
|
||||
}
|
3
code/common/config/readme.md
Normal file
3
code/common/config/readme.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Config
|
||||
|
||||
This package contains configuration injectables used by the services.
|
@@ -0,0 +1,67 @@
|
||||
package nu.marginalia.nodecfg;
|
||||
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.nodecfg.model.NodeProfile;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Tag;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.junit.jupiter.api.parallel.ExecutionMode;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.sql.SQLException;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
@Testcontainers
|
||||
@Execution(ExecutionMode.SAME_THREAD)
|
||||
@Tag("slow")
|
||||
public class NodeConfigurationServiceTest {
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
static HikariDataSource dataSource;
|
||||
static NodeConfigurationService nodeConfigurationService;
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
|
||||
TestMigrationLoader.flywayMigration(dataSource);
|
||||
|
||||
nodeConfigurationService = new NodeConfigurationService(dataSource);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void test() throws SQLException {
|
||||
var a = nodeConfigurationService.create(1, "Test", false, false, NodeProfile.MIXED);
|
||||
var b = nodeConfigurationService.create(2, "Foo", true, false, NodeProfile.MIXED);
|
||||
|
||||
assertEquals(1, a.node());
|
||||
assertEquals("Test", a.description());
|
||||
assertFalse(a.acceptQueries());
|
||||
|
||||
assertEquals(2, b.node());
|
||||
assertEquals("Foo", b.description());
|
||||
assertTrue(b.acceptQueries());
|
||||
|
||||
var list = nodeConfigurationService.getAll();
|
||||
assertEquals(2, list.size());
|
||||
assertEquals(a, list.get(0));
|
||||
assertEquals(b, list.get(1));
|
||||
|
||||
}
|
||||
}
|
@@ -0,0 +1,162 @@
|
||||
package nu.marginalia.storage;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.storage.model.FileStorage;
|
||||
import nu.marginalia.storage.model.FileStorageBase;
|
||||
import nu.marginalia.storage.model.FileStorageBaseType;
|
||||
import nu.marginalia.storage.model.FileStorageType;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.junit.jupiter.api.parallel.Execution;
|
||||
import org.junit.jupiter.api.parallel.ExecutionMode;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.UUID;
|
||||
|
||||
@Testcontainers
|
||||
@Execution(ExecutionMode.SAME_THREAD)
|
||||
@Tag("slow")
|
||||
public class FileStorageServiceTest {
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
static HikariDataSource dataSource;
|
||||
static FileStorageService fileStorageService;
|
||||
|
||||
static List<Path> tempDirs = new ArrayList<>();
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
|
||||
TestMigrationLoader.flywayMigration(dataSource);
|
||||
}
|
||||
|
||||
|
||||
@BeforeEach
|
||||
public void setupEach() {
|
||||
fileStorageService = new FileStorageService(dataSource, 0);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void tearDownEach() {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.createStatement()) {
|
||||
stmt.execute("DELETE FROM FILE_STORAGE");
|
||||
stmt.execute("DELETE FROM FILE_STORAGE_BASE");
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void teardown() {
|
||||
dataSource.close();
|
||||
|
||||
Lists.reverse(tempDirs).forEach(path -> {
|
||||
try {
|
||||
System.out.println("Deleting " + path);
|
||||
Files.delete(path);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private Path createTempDir() {
|
||||
try {
|
||||
Path dir = Files.createTempDirectory("file-storage-test");
|
||||
tempDirs.add(dir);
|
||||
return dir;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPathOverride() {
|
||||
try {
|
||||
System.setProperty("storage.root", "/tmp");
|
||||
|
||||
var path = new FileStorageBase(null, null, 0, null, "test").asPath();
|
||||
Assertions.assertEquals(Path.of("/tmp/test"), path);
|
||||
}
|
||||
finally {
|
||||
System.clearProperty("storage.root");
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void testPathOverride3() {
|
||||
try {
|
||||
System.setProperty("storage.root", "/tmp");
|
||||
|
||||
var path = new FileStorageBase(null, null, 0, null, "/test").asPath();
|
||||
Assertions.assertEquals(Path.of("/tmp/test"), path);
|
||||
}
|
||||
finally {
|
||||
System.clearProperty("storage.root");
|
||||
}
|
||||
}
|
||||
@Test
|
||||
public void testPathOverride2() {
|
||||
try {
|
||||
System.setProperty("storage.root", "/tmp");
|
||||
|
||||
var path = new FileStorage(null, null, null, null, "test", null, null).asPath();
|
||||
|
||||
Assertions.assertEquals(Path.of("/tmp/test"), path);
|
||||
}
|
||||
finally {
|
||||
System.clearProperty("storage.root");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCreateBase() throws SQLException {
|
||||
String name = "test-" + UUID.randomUUID();
|
||||
|
||||
var storage = new FileStorageService(dataSource, 0);
|
||||
var base = storage.createStorageBase(name, createTempDir(), FileStorageBaseType.WORK);
|
||||
|
||||
Assertions.assertEquals(name, base.name());
|
||||
Assertions.assertEquals(FileStorageBaseType.WORK, base.type());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAllocateTemp() throws IOException, SQLException {
|
||||
String name = "test-" + UUID.randomUUID();
|
||||
|
||||
// ensure a base exists
|
||||
var base = fileStorageService.createStorageBase(name, createTempDir(), FileStorageBaseType.STORAGE);
|
||||
tempDirs.add(base.asPath());
|
||||
|
||||
var storage = new FileStorageService(dataSource, 0);
|
||||
|
||||
var fileStorage = storage.allocateStorage(FileStorageType.CRAWL_DATA, "xyz", "thisShouldSucceed");
|
||||
System.out.println("Allocated " + fileStorage.asPath());
|
||||
Assertions.assertTrue(Files.exists(fileStorage.asPath()));
|
||||
tempDirs.add(fileStorage.asPath());
|
||||
}
|
||||
|
||||
|
||||
}
|
72
code/common/db/build.gradle
Normal file
72
code/common/db/build.gradle
Normal file
@@ -0,0 +1,72 @@
|
||||
|
||||
buildscript {
|
||||
repositories {
|
||||
mavenCentral()
|
||||
}
|
||||
dependencies {
|
||||
classpath 'org.flywaydb:flyway-mysql:10.0.1'
|
||||
}
|
||||
}
|
||||
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
id 'jvm-test-suite'
|
||||
id "org.flywaydb.flyway" version "10.0.1"
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
configurations {
|
||||
flywayMigration.extendsFrom(implementation)
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:common:model')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.guava
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
implementation libs.bundles.gson
|
||||
|
||||
implementation libs.notnull
|
||||
|
||||
implementation libs.commons.lang3
|
||||
|
||||
implementation libs.trove
|
||||
|
||||
implementation libs.bundles.mariadb
|
||||
flywayMigration 'org.flywaydb:flyway-mysql:10.0.1'
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation libs.commons.codec
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
testImplementation project(':code:libraries:test-helpers')
|
||||
}
|
||||
|
||||
flyway {
|
||||
url = 'jdbc:mariadb://localhost:3306/WMSA_prod'
|
||||
user = 'wmsa'
|
||||
password = 'wmsa'
|
||||
schemas = ['WMSA_prod']
|
||||
configurations = [ 'compileClasspath', 'flywayMigration' ]
|
||||
locations = ['filesystem:src/main/resources/db/migration']
|
||||
cleanDisabled = false
|
||||
}
|
||||
|
179
code/common/db/java/nu/marginalia/db/DbDomainQueries.java
Normal file
179
code/common/db/java/nu/marginalia/db/DbDomainQueries.java
Normal file
@@ -0,0 +1,179 @@
|
||||
package nu.marginalia.db;
|
||||
|
||||
|
||||
import com.google.common.cache.Cache;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.util.concurrent.UncheckedExecutionException;
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.*;
|
||||
import java.util.concurrent.ExecutionException;
|
||||
|
||||
@Singleton
|
||||
public class DbDomainQueries {
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(DbDomainQueries.class);
|
||||
|
||||
private final Cache<EdgeDomain, Integer> domainIdCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||
private final Cache<EdgeDomain, DomainIdWithNode> domainWithNodeCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||
private final Cache<Integer, EdgeDomain> domainNameCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||
private final Cache<String, List<DomainWithNode>> siblingsCache = CacheBuilder.newBuilder().maximumSize(10_000).build();
|
||||
|
||||
@Inject
|
||||
public DbDomainQueries(HikariDataSource dataSource)
|
||||
{
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
|
||||
public Integer getDomainId(EdgeDomain domain) throws NoSuchElementException {
|
||||
try {
|
||||
return domainIdCache.get(domain, () -> {
|
||||
try (var connection = dataSource.getConnection();
|
||||
var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) {
|
||||
|
||||
stmt.setString(1, domain.toString());
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return rsp.getInt(1);
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
throw new NoSuchElementException();
|
||||
});
|
||||
}
|
||||
catch (UncheckedExecutionException ex) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
catch (ExecutionException ex) {
|
||||
throw new RuntimeException(ex.getCause());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public DomainIdWithNode getDomainIdWithNode(EdgeDomain domain) throws NoSuchElementException {
|
||||
try {
|
||||
return domainWithNodeCache.get(domain, () -> {
|
||||
try (var connection = dataSource.getConnection();
|
||||
var stmt = connection.prepareStatement("SELECT ID, NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) {
|
||||
|
||||
stmt.setString(1, domain.toString());
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
return new DomainIdWithNode(rsp.getInt(1), rsp.getInt(2));
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
throw new NoSuchElementException();
|
||||
});
|
||||
}
|
||||
catch (UncheckedExecutionException ex) {
|
||||
throw new NoSuchElementException();
|
||||
}
|
||||
catch (ExecutionException ex) {
|
||||
throw new RuntimeException(ex.getCause());
|
||||
}
|
||||
}
|
||||
|
||||
public OptionalInt tryGetDomainId(EdgeDomain domain) {
|
||||
|
||||
Integer maybeId = domainIdCache.getIfPresent(domain);
|
||||
if (maybeId != null) {
|
||||
return OptionalInt.of(maybeId);
|
||||
}
|
||||
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
|
||||
try (var stmt = connection.prepareStatement("SELECT ID FROM EC_DOMAIN WHERE DOMAIN_NAME=?")) {
|
||||
stmt.setString(1, domain.toString());
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
var id = rsp.getInt(1);
|
||||
|
||||
domainIdCache.put(domain, id);
|
||||
return OptionalInt.of(id);
|
||||
}
|
||||
}
|
||||
return OptionalInt.empty();
|
||||
}
|
||||
catch (UncheckedExecutionException ex) {
|
||||
throw new RuntimeException(ex.getCause());
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public Optional<EdgeDomain> getDomain(int id) {
|
||||
|
||||
EdgeDomain existing = domainNameCache.getIfPresent(id);
|
||||
if (existing != null) {
|
||||
return Optional.of(existing);
|
||||
}
|
||||
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
try (var stmt = connection.prepareStatement("SELECT DOMAIN_NAME FROM EC_DOMAIN WHERE ID=?")) {
|
||||
stmt.setInt(1, id);
|
||||
var rsp = stmt.executeQuery();
|
||||
if (rsp.next()) {
|
||||
var val = new EdgeDomain(rsp.getString(1));
|
||||
domainNameCache.put(id, val);
|
||||
return Optional.of(val);
|
||||
}
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
}
|
||||
|
||||
public List<DomainWithNode> otherSubdomains(EdgeDomain domain, int cnt) throws ExecutionException {
|
||||
String topDomain = domain.topDomain;
|
||||
|
||||
return siblingsCache.get(topDomain, () -> {
|
||||
List<DomainWithNode> ret = new ArrayList<>();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("SELECT DOMAIN_NAME, NODE_AFFINITY FROM EC_DOMAIN WHERE DOMAIN_TOP = ? LIMIT ?")) {
|
||||
stmt.setString(1, topDomain);
|
||||
stmt.setInt(2, cnt);
|
||||
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
var sibling = new EdgeDomain(rs.getString(1));
|
||||
|
||||
if (sibling.equals(domain))
|
||||
continue;
|
||||
|
||||
ret.add(new DomainWithNode(sibling, rs.getInt(2)));
|
||||
}
|
||||
} catch (SQLException e) {
|
||||
logger.error("Failed to get domain neighbors");
|
||||
}
|
||||
return ret;
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
public record DomainWithNode (EdgeDomain domain, int nodeAffinity) {
|
||||
public boolean isIndexed() {
|
||||
return nodeAffinity > 0;
|
||||
}
|
||||
}
|
||||
|
||||
public record DomainIdWithNode (int domainId, int nodeAffinity) { }
|
||||
}
|
13
code/common/db/java/nu/marginalia/db/DomainBlacklist.java
Normal file
13
code/common/db/java/nu/marginalia/db/DomainBlacklist.java
Normal file
@@ -0,0 +1,13 @@
|
||||
package nu.marginalia.db;
|
||||
|
||||
import com.google.inject.ImplementedBy;
|
||||
import gnu.trove.set.hash.TIntHashSet;
|
||||
|
||||
@ImplementedBy(DomainBlacklistImpl.class)
|
||||
public interface DomainBlacklist {
|
||||
boolean isBlacklisted(int domainId);
|
||||
default TIntHashSet getSpamDomains() {
|
||||
return new TIntHashSet();
|
||||
}
|
||||
void waitUntilLoaded() throws InterruptedException;
|
||||
}
|
126
code/common/db/java/nu/marginalia/db/DomainBlacklistImpl.java
Normal file
126
code/common/db/java/nu/marginalia/db/DomainBlacklistImpl.java
Normal file
@@ -0,0 +1,126 @@
|
||||
package nu.marginalia.db;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import gnu.trove.set.hash.TIntHashSet;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.sql.SQLException;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
@Singleton
|
||||
public class DomainBlacklistImpl implements DomainBlacklist {
|
||||
private final boolean blacklistDisabled = Boolean.getBoolean("blacklist.disable");
|
||||
|
||||
private final HikariDataSource dataSource;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
|
||||
private volatile TIntHashSet spamDomainSet = new TIntHashSet();
|
||||
private volatile boolean isLoaded = false;
|
||||
|
||||
@Inject
|
||||
public DomainBlacklistImpl(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
|
||||
Thread.ofPlatform().daemon().name("BlacklistUpdater").start(this::updateSpamList);
|
||||
}
|
||||
|
||||
private void updateSpamList() {
|
||||
// If the blacklist is disabled, we don't need to do anything
|
||||
if (blacklistDisabled) {
|
||||
isLoaded = true;
|
||||
|
||||
flagLoaded();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
spamDomainSet = getSpamDomains();
|
||||
|
||||
// Set the flag to true after the first loading attempt, regardless of success,
|
||||
// to avoid deadlocking threads that are waiting for this condition
|
||||
flagLoaded();
|
||||
|
||||
// Sleep for 10 minutes before trying again
|
||||
try {
|
||||
TimeUnit.MINUTES.sleep(10);
|
||||
}
|
||||
catch (InterruptedException ex) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private void flagLoaded() {
|
||||
if (!isLoaded) {
|
||||
synchronized (this) {
|
||||
isLoaded = true;
|
||||
notifyAll();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/** Block until the blacklist has been loaded */
|
||||
@Override
|
||||
public void waitUntilLoaded() throws InterruptedException {
|
||||
if (blacklistDisabled)
|
||||
return;
|
||||
|
||||
if (!isLoaded) {
|
||||
logger.info("Waiting for blacklist to be loaded");
|
||||
synchronized (this) {
|
||||
while (!isLoaded) {
|
||||
wait(5000);
|
||||
}
|
||||
}
|
||||
logger.info("Blacklist loaded, size = {}", spamDomainSet.size());
|
||||
}
|
||||
}
|
||||
|
||||
public TIntHashSet getSpamDomains() {
|
||||
final TIntHashSet result = new TIntHashSet(1_000_000);
|
||||
|
||||
if (blacklistDisabled) {
|
||||
return result;
|
||||
}
|
||||
|
||||
try (var connection = dataSource.getConnection()) {
|
||||
try (var stmt = connection.prepareStatement("""
|
||||
SELECT EC_DOMAIN.ID
|
||||
FROM EC_DOMAIN
|
||||
INNER JOIN EC_DOMAIN_BLACKLIST
|
||||
ON (EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_TOP
|
||||
OR EC_DOMAIN_BLACKLIST.URL_DOMAIN = EC_DOMAIN.DOMAIN_NAME)
|
||||
"""))
|
||||
{
|
||||
stmt.setFetchSize(1000);
|
||||
var rsp = stmt.executeQuery();
|
||||
while (rsp.next()) {
|
||||
result.add(rsp.getInt(1));
|
||||
}
|
||||
}
|
||||
} catch (SQLException ex) {
|
||||
logger.error("Failed to load spam domain list", ex);
|
||||
}
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isBlacklisted(int domainId) {
|
||||
|
||||
if (spamDomainSet.contains(domainId)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
@@ -0,0 +1,162 @@
|
||||
package nu.marginalia.db;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.nio.file.Path;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
public class DomainRankingSetsService {
|
||||
private static final Logger logger = LoggerFactory.getLogger(DomainRankingSetsService.class);
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
@Inject
|
||||
public DomainRankingSetsService(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public Optional<DomainRankingSet> get(String name) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT NAME, DESCRIPTION, DEPTH, DEFINITION
|
||||
FROM CONF_DOMAIN_RANKING_SET
|
||||
WHERE NAME = ?
|
||||
""")) {
|
||||
stmt.setString(1, name);
|
||||
var rs = stmt.executeQuery();
|
||||
|
||||
if (!rs.next()) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
return Optional.of(new DomainRankingSet(
|
||||
rs.getString("NAME"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
rs.getInt("DEPTH"),
|
||||
rs.getString("DEFINITION")
|
||||
));
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to get domain set", ex);
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
public void upsert(DomainRankingSet domainRankingSet) {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
REPLACE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, DEPTH, DEFINITION)
|
||||
VALUES (?, ?, ?, ?)
|
||||
"""))
|
||||
{
|
||||
stmt.setString(1, domainRankingSet.name());
|
||||
stmt.setString(2, domainRankingSet.description());
|
||||
stmt.setInt(3, domainRankingSet.depth());
|
||||
stmt.setString(4, domainRankingSet.definition());
|
||||
stmt.executeUpdate();
|
||||
|
||||
if (!conn.getAutoCommit())
|
||||
conn.commit();
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to update domain set", ex);
|
||||
}
|
||||
}
|
||||
|
||||
public void delete(DomainRankingSet domainRankingSet) {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
DELETE FROM CONF_DOMAIN_RANKING_SET
|
||||
WHERE NAME = ?
|
||||
"""))
|
||||
{
|
||||
stmt.setString(1, domainRankingSet.name());
|
||||
stmt.executeUpdate();
|
||||
|
||||
if (!conn.getAutoCommit())
|
||||
conn.commit();
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to delete domain set", ex);
|
||||
}
|
||||
}
|
||||
|
||||
public List<DomainRankingSet> getAll() {
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT NAME, DESCRIPTION, DEPTH, DEFINITION
|
||||
FROM CONF_DOMAIN_RANKING_SET
|
||||
""")) {
|
||||
var rs = stmt.executeQuery();
|
||||
List<DomainRankingSet> ret = new ArrayList<>();
|
||||
|
||||
while (rs.next()) {
|
||||
ret.add(
|
||||
new DomainRankingSet(
|
||||
rs.getString("NAME"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
rs.getInt("DEPTH"),
|
||||
rs.getString("DEFINITION"))
|
||||
);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to get domain set", ex);
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Defines a domain ranking set, parameters for the ranking algorithms.
|
||||
*
|
||||
* @param name Key and name of the set
|
||||
* @param description Human-readable description
|
||||
* @param depth Depth of the algorithm
|
||||
* @param definition Definition of the set, typically a list of domains or globs for domain-names
|
||||
*/
|
||||
public record DomainRankingSet(String name,
|
||||
String description,
|
||||
int depth,
|
||||
String definition) {
|
||||
|
||||
public Path fileName(Path base) {
|
||||
return base.resolve(name().toLowerCase() + ".dat");
|
||||
}
|
||||
|
||||
public String[] domains() {
|
||||
return Arrays.stream(definition().split("\n+"))
|
||||
.map(String::trim)
|
||||
.filter(s -> !s.isBlank())
|
||||
.filter(s -> !s.startsWith("#"))
|
||||
.toArray(String[]::new);
|
||||
}
|
||||
|
||||
public boolean isSpecial() {
|
||||
return name().equals("BLOGS") || name().equals("NONE") || name().equals("RANK");
|
||||
}
|
||||
|
||||
public DomainRankingSet withName(String name) {
|
||||
return this.name == name ? this : new DomainRankingSet(name, description, depth, definition);
|
||||
}
|
||||
|
||||
public DomainRankingSet withDescription(String description) {
|
||||
return this.description == description ? this : new DomainRankingSet(name, description, depth, definition);
|
||||
}
|
||||
|
||||
public DomainRankingSet withDepth(int depth) {
|
||||
return this.depth == depth ? this : new DomainRankingSet(name, description, depth, definition);
|
||||
}
|
||||
|
||||
public DomainRankingSet withDefinition(String definition) {
|
||||
return this.definition == definition ? this : new DomainRankingSet(name, description, depth, definition);
|
||||
}
|
||||
}
|
||||
}
|
217
code/common/db/java/nu/marginalia/db/DomainTypes.java
Normal file
217
code/common/db/java/nu/marginalia/db/DomainTypes.java
Normal file
@@ -0,0 +1,217 @@
|
||||
package nu.marginalia.db;
|
||||
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import gnu.trove.list.TIntList;
|
||||
import gnu.trove.list.array.TIntArrayList;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.Logger;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.net.URL;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/** A list of domains that are known to be of a certain type */
|
||||
@Singleton
|
||||
public class DomainTypes {
|
||||
|
||||
public enum Type {
|
||||
BLOG,
|
||||
CRAWL,
|
||||
TEST
|
||||
}
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(DomainTypes.class);
|
||||
|
||||
private final HikariDataSource dataSource;
|
||||
|
||||
@Inject
|
||||
public DomainTypes(HikariDataSource dataSource) {
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
public String getUrlForSelection(Type type) {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var qs = conn.prepareStatement("SELECT SOURCE FROM DOMAIN_SELECTION_TYPE WHERE NAME = ?"))
|
||||
{
|
||||
qs.setString(1, type.name());
|
||||
var rs = qs.executeQuery();
|
||||
if (rs.next()) {
|
||||
return rs.getString("SOURCE");
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
ex.printStackTrace();
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
|
||||
public void updateUrlForSelection(Type type, String newValue) throws SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var us = conn.prepareStatement("REPLACE INTO DOMAIN_SELECTION_TYPE(NAME, SOURCE) VALUES (?, ?)")) {
|
||||
us.setString(1, type.name());
|
||||
us.setString(2, newValue);
|
||||
us.executeUpdate();
|
||||
}
|
||||
}
|
||||
|
||||
/** Get all domains of a certain type, including domains that are not in the EC_DOMAIN table */
|
||||
public List<String> getAllDomainsByType(Type type) {
|
||||
List<String> ret = new ArrayList<>();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT DOMAIN_NAME
|
||||
FROM DOMAIN_SELECTION INNER JOIN DOMAIN_SELECTION_TYPE ON DOMAIN_TYPE_ID = DOMAIN_SELECTION_TYPE.ID
|
||||
WHERE DOMAIN_SELECTION_TYPE.NAME = ?
|
||||
"""))
|
||||
{
|
||||
stmt.setString(1, type.name());
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
ret.add(rs.getString(1));
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Retrieve the domain id of all domains of a certain type,
|
||||
* ignoring entries that are not in the EC_DOMAIN table */
|
||||
public TIntList getKnownDomainsByType(Type type) {
|
||||
TIntList ret = new TIntArrayList();
|
||||
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT EC_DOMAIN.ID
|
||||
FROM DOMAIN_SELECTION
|
||||
INNER JOIN DOMAIN_SELECTION_TYPE ON DOMAIN_TYPE_ID = DOMAIN_SELECTION_TYPE.ID
|
||||
INNER JOIN EC_DOMAIN ON DOMAIN_SELECTION.DOMAIN_NAME = EC_DOMAIN.DOMAIN_NAME
|
||||
WHERE DOMAIN_SELECTION_TYPE.NAME = ?
|
||||
"""))
|
||||
{
|
||||
stmt.setString(1, type.name());
|
||||
var rs = stmt.executeQuery();
|
||||
while (rs.next()) {
|
||||
ret.add(rs.getInt(1));
|
||||
}
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
throw new RuntimeException(ex);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Reload the list of domains of a certain type from the source */
|
||||
public void reloadDomainsList(Type type) throws IOException, SQLException {
|
||||
try (var conn = dataSource.getConnection();
|
||||
var stmt = conn.prepareStatement("""
|
||||
SELECT SOURCE, ID FROM DOMAIN_SELECTION_TYPE WHERE NAME = ?
|
||||
""");
|
||||
var deleteStatement = conn.prepareStatement("""
|
||||
DELETE FROM DOMAIN_SELECTION WHERE DOMAIN_TYPE_ID = ?
|
||||
""");
|
||||
var insertStatement = conn.prepareStatement("""
|
||||
INSERT IGNORE INTO DOMAIN_SELECTION (DOMAIN_NAME, DOMAIN_TYPE_ID) VALUES (?, ?)
|
||||
""")
|
||||
)
|
||||
{
|
||||
stmt.setString(1, type.name());
|
||||
var rsp = stmt.executeQuery();
|
||||
|
||||
if (!rsp.next()) {
|
||||
throw new RuntimeException("No such domain selection type: " + type);
|
||||
}
|
||||
|
||||
var source = rsp.getString(1);
|
||||
int typeId = rsp.getInt(2);
|
||||
|
||||
List<String> downloadDomains = downloadDomainsList(source);
|
||||
|
||||
try {
|
||||
conn.setAutoCommit(false);
|
||||
deleteStatement.setInt(1, typeId);
|
||||
deleteStatement.executeUpdate();
|
||||
|
||||
for (String domain : downloadDomains) {
|
||||
insertStatement.setString(1, domain);
|
||||
insertStatement.setInt(2, typeId);
|
||||
insertStatement.executeUpdate();
|
||||
// Could use batch insert here, but this executes infrequently, so it's not worth the hassle
|
||||
}
|
||||
|
||||
conn.commit();
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
conn.rollback();
|
||||
throw ex;
|
||||
}
|
||||
finally {
|
||||
conn.setAutoCommit(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> downloadList(Type type) throws IOException {
|
||||
var url = getUrlForSelection(type);
|
||||
if (url.isBlank())
|
||||
return List.of();
|
||||
return downloadDomainsList(url);
|
||||
}
|
||||
|
||||
|
||||
private List<String> downloadDomainsList(String source) throws IOException {
|
||||
if (source.isBlank())
|
||||
return List.of();
|
||||
|
||||
List<String> ret = new ArrayList<>();
|
||||
|
||||
logger.info("Downloading domain list from {}", source);
|
||||
|
||||
try (var br = new BufferedReader(new InputStreamReader(new URL(source).openStream()))) {
|
||||
String line;
|
||||
|
||||
while ((line = br.readLine()) != null) {
|
||||
line = cleanDomainListLine(line);
|
||||
|
||||
|
||||
if (isValidDomainListEntry(line))
|
||||
ret.add(line);
|
||||
}
|
||||
}
|
||||
|
||||
logger.info("-- found {}", ret.size());
|
||||
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
private String cleanDomainListLine(String line) {
|
||||
line = line.trim();
|
||||
|
||||
int hashIdx = line.indexOf('#');
|
||||
if (hashIdx >= 0)
|
||||
line = line.substring(0, hashIdx).trim();
|
||||
|
||||
return line;
|
||||
}
|
||||
|
||||
private boolean isValidDomainListEntry(String line) {
|
||||
if (line.isBlank())
|
||||
return false;
|
||||
if (!line.matches("[a-z0-9\\-.]+"))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
31
code/common/db/readme.md
Normal file
31
code/common/db/readme.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# DB
|
||||
|
||||
This module primarily contains SQL files for the URLs database. The most central tables are `EC_DOMAIN`, `EC_URL` and `EC_PAGE_DATA`.
|
||||
|
||||
## Flyway
|
||||
|
||||
The system uses flyway to track database changes and allow easy migrations, this is accessible via gradle tasks.
|
||||
|
||||
* `flywayMigrate`
|
||||
* `flywayBaseline`
|
||||
* `flywayRepair`
|
||||
* `flywayClean` (dangerous as in wipes your entire database)
|
||||
|
||||
Refer to the [Flyway documentation](https://documentation.red-gate.com/fd/flyway-documentation-138346877.html) for guidance.
|
||||
It's well documented and these are probably the only four tasks you'll ever need.
|
||||
|
||||
If you are not running the system via docker, you need to provide alternative connection details than
|
||||
the defaults (TODO: how?).
|
||||
|
||||
The migration files are in [resources/db/migration](resources/db/migration). The file name convention
|
||||
incorporates the project's cal-ver versioning; and are applied in lexicographical order.
|
||||
|
||||
VYY_MM_v_nnn__description.sql
|
||||
|
||||
## Central Paths
|
||||
|
||||
* [migrations](resources/db/migration) - Flyway migrations
|
||||
|
||||
## See Also
|
||||
|
||||
* [common/service](../service) implements DatabaseModule, which is from where the services get database connections.
|
144
code/common/db/resources/db/migration/V23_06_0_000__base.sql
Normal file
144
code/common/db/resources/db/migration/V23_06_0_000__base.sql
Normal file
@@ -0,0 +1,144 @@
|
||||
|
||||
CREATE TABLE IF NOT EXISTS EC_DOMAIN (
|
||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||
|
||||
DOMAIN_NAME VARCHAR(255) UNIQUE NOT NULL,
|
||||
DOMAIN_TOP VARCHAR(255) NOT NULL,
|
||||
|
||||
INDEXED INT DEFAULT 0 NOT NULL COMMENT "~number of documents visited / 100",
|
||||
STATE ENUM('ACTIVE', 'EXHAUSTED', 'SPECIAL', 'SOCIAL_MEDIA', 'BLOCKED', 'REDIR', 'ERROR', 'UNKNOWN') NOT NULL DEFAULT 'active' COMMENT "@see EdgeDomainIndexingState",
|
||||
|
||||
RANK DOUBLE,
|
||||
DOMAIN_ALIAS INTEGER,
|
||||
IP VARCHAR(48),
|
||||
|
||||
INDEX_DATE TIMESTAMP DEFAULT NOW(),
|
||||
DISCOVER_DATE TIMESTAMP DEFAULT NOW(),
|
||||
|
||||
IS_ALIVE BOOLEAN AS (STATE='ACTIVE' OR STATE='EXHAUSTED' OR STATE='SPECIAL' OR STATE='SOCIAL_MEDIA') VIRTUAL
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
|
||||
CREATE TABLE IF NOT EXISTS EC_URL (
|
||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||
DOMAIN_ID INT NOT NULL,
|
||||
|
||||
PROTO ENUM('http','https','gemini') NOT NULL COLLATE utf8mb4_unicode_ci,
|
||||
PATH VARCHAR(255) NOT NULL,
|
||||
PORT INT,
|
||||
PARAM VARCHAR(255),
|
||||
|
||||
PATH_HASH BIGINT NOT NULL COMMENT "Hash of PATH for uniqueness check by domain",
|
||||
|
||||
VISITED BOOLEAN NOT NULL DEFAULT FALSE,
|
||||
|
||||
STATE ENUM('ok', 'redirect', 'dead', 'archived', 'disqualified') NOT NULL DEFAULT 'ok' COLLATE utf8mb4_unicode_ci,
|
||||
|
||||
CONSTRAINT CONS UNIQUE (DOMAIN_ID, PATH_HASH),
|
||||
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_bin;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS EC_PAGE_DATA (
|
||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||
|
||||
TITLE VARCHAR(255) NOT NULL,
|
||||
DESCRIPTION VARCHAR(255) NOT NULL,
|
||||
|
||||
WORDS_TOTAL INTEGER NOT NULL,
|
||||
FORMAT ENUM('PLAIN', 'UNKNOWN', 'HTML123', 'HTML4', 'XHTML', 'HTML5', 'MARKDOWN') NOT NULL,
|
||||
FEATURES INT COMMENT "Bit-encoded feature set of document, @see HtmlFeature" NOT NULL,
|
||||
|
||||
DATA_HASH BIGINT NOT NULL,
|
||||
QUALITY DOUBLE NOT NULL,
|
||||
|
||||
PUB_YEAR SMALLINT,
|
||||
|
||||
FOREIGN KEY (ID) REFERENCES EC_URL(ID) ON DELETE CASCADE
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS EC_DOMAIN_LINK (
|
||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||
SOURCE_DOMAIN_ID INT NOT NULL,
|
||||
DEST_DOMAIN_ID INT NOT NULL,
|
||||
|
||||
CONSTRAINT CONS UNIQUE (SOURCE_DOMAIN_ID, DEST_DOMAIN_ID),
|
||||
|
||||
FOREIGN KEY (SOURCE_DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE,
|
||||
FOREIGN KEY (DEST_DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS DOMAIN_METADATA (
|
||||
ID INT PRIMARY KEY,
|
||||
KNOWN_URLS INT DEFAULT 0,
|
||||
VISITED_URLS INT DEFAULT 0,
|
||||
GOOD_URLS INT DEFAULT 0,
|
||||
|
||||
FOREIGN KEY (ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE EC_FEED_URL (
|
||||
URL VARCHAR(255) PRIMARY KEY,
|
||||
DOMAIN_ID INT,
|
||||
|
||||
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
CREATE OR REPLACE VIEW EC_URL_VIEW AS
|
||||
SELECT
|
||||
CONCAT(EC_URL.PROTO,
|
||||
'://',
|
||||
EC_DOMAIN.DOMAIN_NAME,
|
||||
IF(EC_URL.PORT IS NULL, '', CONCAT(':', EC_URL.PORT)),
|
||||
EC_URL.PATH,
|
||||
IF(EC_URL.PARAM IS NULL, '', CONCAT('?', EC_URL.PARAM))
|
||||
) AS URL,
|
||||
EC_URL.PATH_HASH AS PATH_HASH,
|
||||
EC_URL.PATH AS PATH,
|
||||
EC_DOMAIN.DOMAIN_NAME AS DOMAIN_NAME,
|
||||
EC_DOMAIN.DOMAIN_TOP AS DOMAIN_TOP,
|
||||
EC_URL.ID AS ID,
|
||||
EC_DOMAIN.ID AS DOMAIN_ID,
|
||||
EC_URL.VISITED AS VISITED,
|
||||
EC_PAGE_DATA.QUALITY AS QUALITY,
|
||||
EC_PAGE_DATA.DATA_HASH AS DATA_HASH,
|
||||
EC_PAGE_DATA.TITLE AS TITLE,
|
||||
EC_PAGE_DATA.DESCRIPTION AS DESCRIPTION,
|
||||
EC_PAGE_DATA.WORDS_TOTAL AS WORDS_TOTAL,
|
||||
EC_PAGE_DATA.FORMAT AS FORMAT,
|
||||
EC_PAGE_DATA.FEATURES AS FEATURES,
|
||||
EC_DOMAIN.IP AS IP,
|
||||
EC_URL.STATE AS STATE,
|
||||
EC_DOMAIN.RANK AS RANK,
|
||||
EC_DOMAIN.STATE AS DOMAIN_STATE
|
||||
FROM EC_URL
|
||||
LEFT JOIN EC_PAGE_DATA
|
||||
ON EC_PAGE_DATA.ID = EC_URL.ID
|
||||
INNER JOIN EC_DOMAIN
|
||||
ON EC_URL.DOMAIN_ID = EC_DOMAIN.ID;
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW EC_RELATED_LINKS_VIEW AS
|
||||
SELECT
|
||||
SOURCE_DOMAIN_ID,
|
||||
SOURCE_DOMAIN.DOMAIN_NAME AS SOURCE_DOMAIN,
|
||||
SOURCE_DOMAIN.DOMAIN_TOP AS SOURCE_TOP_DOMAIN,
|
||||
DEST_DOMAIN_ID,
|
||||
DEST_DOMAIN.DOMAIN_NAME AS DEST_DOMAIN,
|
||||
DEST_DOMAIN.DOMAIN_TOP AS DEST_TOP_DOMAIN
|
||||
FROM EC_DOMAIN_LINK
|
||||
INNER JOIN EC_DOMAIN AS SOURCE_DOMAIN
|
||||
ON SOURCE_DOMAIN.ID=SOURCE_DOMAIN_ID
|
||||
INNER JOIN EC_DOMAIN AS DEST_DOMAIN
|
||||
ON DEST_DOMAIN.ID=DEST_DOMAIN_ID
|
||||
;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS EC_DOMAIN_INDEXED_INDEX ON EC_DOMAIN (INDEXED);
|
||||
CREATE INDEX IF NOT EXISTS EC_DOMAIN_TOP_DOMAIN ON EC_DOMAIN (DOMAIN_TOP);
|
@@ -0,0 +1,8 @@
|
||||
|
||||
CREATE TABLE IF NOT EXISTS EC_DOMAIN_BLACKLIST (
|
||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||
URL_DOMAIN VARCHAR(255) UNIQUE NOT NULL,
|
||||
COMMENT VARCHAR(255) DEFAULT NULL
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
@@ -0,0 +1,19 @@
|
||||
|
||||
CREATE TABLE IF NOT EXISTS REF_DICTIONARY (
|
||||
TYPE VARCHAR(16),
|
||||
WORD VARCHAR(255),
|
||||
DEFINITION VARCHAR(255)
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS REF_WIKI_ARTICLE (
|
||||
NAME VARCHAR(255) PRIMARY KEY,
|
||||
REF_NAME VARCHAR(255) COMMENT "If this is a redirect, it redirects to this REF_WIKI_ARTICLE.NAME",
|
||||
ENTRY LONGBLOB
|
||||
)
|
||||
ROW_FORMAT=DYNAMIC
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
CREATE INDEX IF NOT EXISTS REF_DICTIONARY_WORD ON REF_DICTIONARY (WORD);
|
@@ -0,0 +1,5 @@
|
||||
|
||||
CREATE TABLE CRAWL_QUEUE(
|
||||
DOMAIN_NAME VARCHAR(255) UNIQUE,
|
||||
SOURCE VARCHAR(255)
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
@@ -0,0 +1,13 @@
|
||||
CREATE TABLE IF NOT EXISTS DATA_DOMAIN_SCREENSHOT (
|
||||
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
|
||||
CONTENT_TYPE ENUM ('image/png', 'image/webp', 'image/svg+xml') NOT NULL,
|
||||
DATA LONGBLOB NOT NULL
|
||||
)
|
||||
ROW_FORMAT=DYNAMIC
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
CREATE TABLE DATA_DOMAIN_HISTORY (
|
||||
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
|
||||
SCREENSHOT_DATE DATE DEFAULT NOW()
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
@@ -0,0 +1,15 @@
|
||||
CREATE TABLE DOMAIN_COMPLAINT(
|
||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||
DOMAIN_ID INT NOT NULL,
|
||||
|
||||
CATEGORY VARCHAR(255) NOT NULL,
|
||||
DESCRIPTION TEXT,
|
||||
SAMPLE VARCHAR(255),
|
||||
FILE_DATE TIMESTAMP NOT NULL DEFAULT NOW(),
|
||||
|
||||
REVIEWED BOOLEAN AS (REVIEW_DATE > 0) VIRTUAL,
|
||||
DECISION VARCHAR(255),
|
||||
REVIEW_DATE TIMESTAMP,
|
||||
|
||||
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
|
@@ -0,0 +1,7 @@
|
||||
CREATE TABLE IF NOT EXISTS EC_API_KEY (
|
||||
LICENSE_KEY VARCHAR(255) UNIQUE,
|
||||
LICENSE VARCHAR(255) NOT NULL,
|
||||
NAME VARCHAR(255) NOT NULL,
|
||||
EMAIL VARCHAR(255) NOT NULL,
|
||||
RATE INT DEFAULT 10
|
||||
);
|
@@ -0,0 +1,34 @@
|
||||
|
||||
CREATE TABLE EC_DOMAIN_NEIGHBORS (
|
||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||
DOMAIN_ID INT NOT NULL,
|
||||
NEIGHBOR_ID INT NOT NULL,
|
||||
ADJ_IDX INT NOT NULL,
|
||||
|
||||
CONSTRAINT CONS UNIQUE (DOMAIN_ID, ADJ_IDX),
|
||||
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
CREATE TABLE EC_DOMAIN_NEIGHBORS_2 (
|
||||
DOMAIN_ID INT NOT NULL,
|
||||
NEIGHBOR_ID INT NOT NULL,
|
||||
RELATEDNESS DOUBLE NOT NULL,
|
||||
|
||||
PRIMARY KEY (DOMAIN_ID, NEIGHBOR_ID),
|
||||
FOREIGN KEY (DOMAIN_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE,
|
||||
FOREIGN KEY (NEIGHBOR_ID) REFERENCES EC_DOMAIN(ID) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
|
||||
CREATE OR REPLACE VIEW EC_NEIGHBORS_VIEW AS
|
||||
SELECT
|
||||
DOM.DOMAIN_NAME AS DOMAIN_NAME,
|
||||
DOM.ID AS DOMAIN_ID,
|
||||
NEIGHBOR.DOMAIN_NAME AS NEIGHBOR_NAME,
|
||||
NEIGHBOR.ID AS NEIGHBOR_ID,
|
||||
ROUND(100 * RELATEDNESS) AS RELATEDNESS
|
||||
FROM EC_DOMAIN_NEIGHBORS_2
|
||||
INNER JOIN EC_DOMAIN DOM ON DOMAIN_ID=DOM.ID
|
||||
INNER JOIN EC_DOMAIN NEIGHBOR ON NEIGHBOR_ID=NEIGHBOR.ID;
|
@@ -0,0 +1,5 @@
|
||||
|
||||
CREATE TABLE IF NOT EXISTS EC_RANDOM_DOMAINS (
|
||||
DOMAIN_ID INT PRIMARY KEY,
|
||||
DOMAIN_SET INT NOT NULL
|
||||
);
|
@@ -0,0 +1,8 @@
|
||||
|
||||
CREATE TABLE SEARCH_NEWS_FEED (
|
||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||
TITLE VARCHAR(255) NOT NULL,
|
||||
LINK VARCHAR(255) UNIQUE NOT NULL,
|
||||
SOURCE VARCHAR(255),
|
||||
LIST_DATE DATE NOT NULL
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
@@ -0,0 +1,19 @@
|
||||
CREATE TABLE IF NOT EXISTS DOMAIN_SELECTION_TYPE (
|
||||
ID INT PRIMARY KEY AUTO_INCREMENT,
|
||||
NAME VARCHAR(255) UNIQUE,
|
||||
SOURCE VARCHAR(255) NOT NULL
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_bin;
|
||||
|
||||
CREATE TABLE DOMAIN_SELECTION (
|
||||
DOMAIN_NAME VARCHAR(255) PRIMARY KEY,
|
||||
DOMAIN_TYPE_ID INT,
|
||||
FOREIGN KEY (DOMAIN_TYPE_ID) REFERENCES DOMAIN_SELECTION_TYPE(ID) ON DELETE CASCADE
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_unicode_ci;
|
||||
|
||||
INSERT IGNORE INTO DOMAIN_SELECTION_TYPE(NAME, SOURCE)
|
||||
VALUES ('BLOG', 'https://raw.githubusercontent.com/MarginaliaSearch/PublicData/master/sets/blogs.txt'),
|
||||
('TEST', 'https://downloads.marginalia.nu/domain-list-test.txt');
|
@@ -0,0 +1,27 @@
|
||||
CREATE TABLE IF NOT EXISTS SERVICE_HEARTBEAT (
|
||||
SERVICE_NAME VARCHAR(255) PRIMARY KEY COMMENT "Full name of the service, including node id if applicable, e.g. search-service:0",
|
||||
SERVICE_BASE VARCHAR(255) NOT NULL COMMENT "Base name of the service, e.g. search-service",
|
||||
INSTANCE VARCHAR(255) NOT NULL COMMENT "UUID of the service instance",
|
||||
ALIVE BOOLEAN NOT NULL DEFAULT TRUE COMMENT "Set to false when the service is doing an orderly shutdown",
|
||||
HEARTBEAT_TIME TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) COMMENT "Service was last seen at this point"
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS PROCESS_HEARTBEAT (
|
||||
PROCESS_NAME VARCHAR(255) PRIMARY KEY COMMENT "Full name of the process, including node id if applicable, e.g. converter:0",
|
||||
PROCESS_BASE VARCHAR(255) NOT NULL COMMENT "Base name of the process, e.g. converter",
|
||||
INSTANCE VARCHAR(255) NOT NULL COMMENT "UUID of the process instance",
|
||||
STATUS ENUM ('STARTING', 'RUNNING', 'STOPPED') NOT NULL DEFAULT 'STARTING' COMMENT "Status of the process",
|
||||
PROGRESS INT NOT NULL DEFAULT 0 COMMENT "Progress of the process",
|
||||
HEARTBEAT_TIME TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) COMMENT "Process was last seen at this point"
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS SERVICE_EVENTLOG(
|
||||
ID BIGINT AUTO_INCREMENT PRIMARY KEY COMMENT "Unique id",
|
||||
SERVICE_NAME VARCHAR(255) NOT NULL COMMENT "Full name of the service, including node id if applicable, e.g. search-service:0",
|
||||
SERVICE_BASE VARCHAR(255) NOT NULL COMMENT "Base name of the service, e.g. search-service",
|
||||
INSTANCE VARCHAR(255) NOT NULL COMMENT "UUID of the service instance",
|
||||
EVENT_TIME TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) COMMENT "Event time",
|
||||
EVENT_TYPE VARCHAR(255) NOT NULL COMMENT "Event type",
|
||||
EVENT_MESSAGE VARCHAR(255) NOT NULL COMMENT "Event message"
|
||||
);
|
||||
|
@@ -0,0 +1,21 @@
|
||||
CREATE TABLE IF NOT EXISTS MESSAGE_QUEUE (
|
||||
ID BIGINT AUTO_INCREMENT PRIMARY KEY COMMENT 'Unique id',
|
||||
RELATED_ID BIGINT NOT NULL DEFAULT -1 COMMENT 'Unique id a related message',
|
||||
SENDER_INBOX VARCHAR(255) COMMENT 'Name of the sender inbox',
|
||||
RECIPIENT_INBOX VARCHAR(255) NOT NULL COMMENT 'Name of the recipient inbox',
|
||||
FUNCTION VARCHAR(255) NOT NULL COMMENT 'Which function to run',
|
||||
PAYLOAD TEXT COMMENT 'Message to recipient',
|
||||
-- These fields are used to avoid double processing of messages
|
||||
-- instance marks the unique instance of the party, and the tick marks
|
||||
-- the current polling iteration. Both are necessary.
|
||||
OWNER_INSTANCE VARCHAR(255) COMMENT 'Instance UUID corresponding to the party that has claimed the message',
|
||||
OWNER_TICK BIGINT DEFAULT -1 COMMENT 'Used by recipient to determine which messages it has processed',
|
||||
STATE ENUM('NEW', 'ACK', 'OK', 'ERR', 'DEAD')
|
||||
NOT NULL DEFAULT 'NEW' COMMENT 'Processing state',
|
||||
CREATED_TIME TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) COMMENT 'Time of creation',
|
||||
UPDATED_TIME TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) COMMENT 'Time of last update',
|
||||
TTL INT COMMENT 'Time to live in seconds'
|
||||
);
|
||||
|
||||
CREATE INDEX MESSAGE_QUEUE_STATE_IDX ON MESSAGE_QUEUE(STATE);
|
||||
CREATE INDEX MESSAGE_QUEUE_OI_TICK_IDX ON MESSAGE_QUEUE(OWNER_INSTANCE, OWNER_TICK);
|
@@ -0,0 +1,42 @@
|
||||
CREATE TABLE IF NOT EXISTS FILE_STORAGE_BASE (
|
||||
ID BIGINT PRIMARY KEY AUTO_INCREMENT,
|
||||
NAME VARCHAR(255) NOT NULL UNIQUE,
|
||||
PATH VARCHAR(255) NOT NULL UNIQUE COMMENT 'The path to the storage base',
|
||||
TYPE ENUM ('SSD_INDEX', 'SSD_WORK', 'SLOW', 'BACKUP') NOT NULL,
|
||||
PERMIT_TEMP BOOLEAN NOT NULL DEFAULT FALSE COMMENT 'If true, the storage can be used for temporary files'
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_bin;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS FILE_STORAGE (
|
||||
ID BIGINT PRIMARY KEY AUTO_INCREMENT,
|
||||
BASE_ID BIGINT NOT NULL,
|
||||
PATH VARCHAR(255) NOT NULL COMMENT 'The path to the storage relative to the base',
|
||||
DESCRIPTION VARCHAR(255) NOT NULL,
|
||||
TYPE ENUM ('CRAWL_SPEC', 'CRAWL_DATA', 'PROCESSED_DATA', 'INDEX_STAGING', 'LEXICON_STAGING', 'INDEX_LIVE', 'LEXICON_LIVE', 'SEARCH_SETS', 'BACKUP', 'EXPORT') NOT NULL,
|
||||
DO_PURGE BOOLEAN NOT NULL DEFAULT FALSE COMMENT 'If true, the storage may be cleaned',
|
||||
CREATE_DATE TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6),
|
||||
CONSTRAINT CONS UNIQUE (BASE_ID, PATH),
|
||||
FOREIGN KEY (BASE_ID) REFERENCES FILE_STORAGE_BASE(ID) ON DELETE CASCADE
|
||||
)
|
||||
CHARACTER SET utf8mb4
|
||||
COLLATE utf8mb4_bin;
|
||||
|
||||
CREATE TABLE IF NOT EXISTS FILE_STORAGE_RELATION (
|
||||
SOURCE_ID BIGINT NOT NULL,
|
||||
TARGET_ID BIGINT NOT NULL,
|
||||
CONSTRAINT CONS UNIQUE (SOURCE_ID, TARGET_ID),
|
||||
FOREIGN KEY (SOURCE_ID) REFERENCES FILE_STORAGE(ID) ON DELETE CASCADE,
|
||||
FOREIGN KEY (TARGET_ID) REFERENCES FILE_STORAGE(ID) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE VIEW FILE_STORAGE_VIEW
|
||||
AS SELECT
|
||||
CONCAT(BASE.PATH, '/', STORAGE.PATH) AS PATH,
|
||||
STORAGE.TYPE AS TYPE,
|
||||
DESCRIPTION AS DESCRIPTION,
|
||||
CREATE_DATE AS CREATE_DATE,
|
||||
STORAGE.ID AS ID,
|
||||
BASE.ID AS BASE_ID
|
||||
FROM FILE_STORAGE STORAGE
|
||||
INNER JOIN FILE_STORAGE_BASE BASE ON STORAGE.BASE_ID=BASE.ID;
|
@@ -0,0 +1,28 @@
|
||||
INSERT IGNORE INTO FILE_STORAGE_BASE(NAME, PATH, TYPE, PERMIT_TEMP)
|
||||
VALUES
|
||||
('Index Storage', '/vol', 'SSD_INDEX', false),
|
||||
('Data Storage', '/samples', 'SLOW', true);
|
||||
|
||||
INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
|
||||
SELECT ID, 'iw', "Index Staging Area", 'INDEX_STAGING'
|
||||
FROM FILE_STORAGE_BASE WHERE NAME='Index Storage';
|
||||
|
||||
INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
|
||||
SELECT ID, 'ir', "Index Live Area", 'INDEX_LIVE'
|
||||
FROM FILE_STORAGE_BASE WHERE NAME='Index Storage';
|
||||
|
||||
INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
|
||||
SELECT ID, 'lw', "Lexicon Staging Area", 'LEXICON_STAGING'
|
||||
FROM FILE_STORAGE_BASE WHERE NAME='Index Storage';
|
||||
|
||||
INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
|
||||
SELECT ID, 'lr', "Lexicon Live Area", 'LEXICON_LIVE'
|
||||
FROM FILE_STORAGE_BASE WHERE NAME='Index Storage';
|
||||
|
||||
INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
|
||||
SELECT ID, 'ss', "Search Sets", 'SEARCH_SETS'
|
||||
FROM FILE_STORAGE_BASE WHERE NAME='Index Storage';
|
||||
|
||||
INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
|
||||
SELECT ID, 'export', "Exported Data", 'EXPORT'
|
||||
FROM FILE_STORAGE_BASE WHERE TYPE='EXPORT';
|
@@ -0,0 +1,7 @@
|
||||
INSERT INTO MESSAGE_QUEUE(RECIPIENT_INBOX,FUNCTION,PAYLOAD) VALUES
|
||||
('fsm:converter_monitor','INITIAL',''),
|
||||
('fsm:loader_monitor','INITIAL',''),
|
||||
('fsm:crawler_monitor','INITIAL',''),
|
||||
('fsm:message_queue_monitor','INITIAL',''),
|
||||
('fsm:process_liveness_monitor','INITIAL',''),
|
||||
('fsm:file_storage_monitor','INITIAL','');
|
@@ -0,0 +1,10 @@
|
||||
CREATE TABLE IF NOT EXISTS TASK_HEARTBEAT (
|
||||
TASK_NAME VARCHAR(255) PRIMARY KEY COMMENT "Full name of the task, including node id if applicable, e.g. reconvert:0",
|
||||
TASK_BASE VARCHAR(255) NOT NULL COMMENT "Base name of the task, e.g. reconvert",
|
||||
INSTANCE VARCHAR(255) NOT NULL COMMENT "UUID of the task instance",
|
||||
SERVICE_INSTANCE VARCHAR(255) NOT NULL COMMENT "UUID of the parent service",
|
||||
STATUS ENUM ('STARTING', 'RUNNING', 'STOPPED') NOT NULL DEFAULT 'STARTING' COMMENT "Status of the task",
|
||||
PROGRESS INT NOT NULL DEFAULT 0 COMMENT "Progress of the task",
|
||||
STAGE_NAME VARCHAR(255) DEFAULT "",
|
||||
HEARTBEAT_TIME TIMESTAMP(6) NOT NULL DEFAULT CURRENT_TIMESTAMP(6) COMMENT "Task was last seen at this point"
|
||||
);
|
@@ -0,0 +1,2 @@
|
||||
CREATE INDEX IF NOT EXISTS SERVICE_EVENTLOG__EVENT_TYPE_IDX ON SERVICE_EVENTLOG (EVENT_TYPE);
|
||||
CREATE INDEX IF NOT EXISTS SERVICE_EVENTLOG__SERVICE_NAME_IDX ON SERVICE_EVENTLOG (SERVICE_NAME);
|
@@ -0,0 +1,9 @@
|
||||
ALTER TABLE FILE_STORAGE MODIFY COLUMN TYPE ENUM ('CRAWL_SPEC', 'CRAWL_DATA', 'PROCESSED_DATA', 'INDEX_STAGING', 'LEXICON_STAGING', 'INDEX_LIVE', 'LEXICON_LIVE', 'SEARCH_SETS', 'BACKUP', 'EXPORT', 'LINKDB_LIVE', 'LINKDB_STAGING') NOT NULL;
|
||||
|
||||
INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
|
||||
SELECT ID, 'ldbr', "Linkdb Current", 'LINKDB_LIVE'
|
||||
FROM FILE_STORAGE_BASE WHERE NAME='Index Storage';
|
||||
|
||||
INSERT IGNORE INTO FILE_STORAGE(BASE_ID, PATH, DESCRIPTION, TYPE)
|
||||
SELECT ID, 'ldbw', "Linkdb Staging Area", 'LINKDB_STAGING'
|
||||
FROM FILE_STORAGE_BASE WHERE NAME='Index Storage';
|
@@ -0,0 +1,3 @@
|
||||
DROP VIEW EC_URL_VIEW;
|
||||
DROP TABLE EC_PAGE_DATA;
|
||||
DROP TABLE EC_URL;
|
@@ -0,0 +1,3 @@
|
||||
INSERT IGNORE INTO FILE_STORAGE_BASE(NAME, PATH, TYPE, PERMIT_TEMP)
|
||||
VALUES
|
||||
('Backup Storage', '/backup', 'BACKUP', true);
|
@@ -0,0 +1 @@
|
||||
DELETE FROM FILE_STORAGE WHERE TYPE IN ('LEXICON_STAGING', 'LEXICON_LIVE');
|
@@ -0,0 +1,21 @@
|
||||
ALTER TABLE FILE_STORAGE_BASE MODIFY COLUMN NAME VARCHAR(255) NOT NULL;
|
||||
ALTER TABLE FILE_STORAGE_BASE MODIFY COLUMN PATH VARCHAR(255) NOT NULL;
|
||||
DROP INDEX PATH ON FILE_STORAGE_BASE;
|
||||
DROP INDEX NAME ON FILE_STORAGE_BASE;
|
||||
ALTER TABLE FILE_STORAGE_BASE ADD COLUMN NODE INT NOT NULL DEFAULT -1;
|
||||
CREATE UNIQUE INDEX FILE_STORAGE_BASE__NODE_NAME ON FILE_STORAGE_BASE(NODE, NAME);
|
||||
CREATE UNIQUE INDEX FILE_STORAGE_BASE__NODE_PATH ON FILE_STORAGE_BASE(NODE, PATH);
|
||||
|
||||
|
||||
DROP VIEW FILE_STORAGE_VIEW;
|
||||
CREATE VIEW FILE_STORAGE_VIEW
|
||||
AS SELECT
|
||||
CONCAT(BASE.PATH, '/', STORAGE.PATH) AS PATH,
|
||||
STORAGE.TYPE AS TYPE,
|
||||
NODE AS NODE,
|
||||
DESCRIPTION AS DESCRIPTION,
|
||||
CREATE_DATE AS CREATE_DATE,
|
||||
STORAGE.ID AS ID,
|
||||
BASE.ID AS BASE_ID
|
||||
FROM FILE_STORAGE STORAGE
|
||||
INNER JOIN FILE_STORAGE_BASE BASE ON STORAGE.BASE_ID=BASE.ID;
|
@@ -0,0 +1,3 @@
|
||||
ALTER TABLE TASK_HEARTBEAT ADD COLUMN NODE INT NOT NULL DEFAULT -1;
|
||||
ALTER TABLE PROCESS_HEARTBEAT ADD COLUMN NODE INT NOT NULL DEFAULT -1;
|
||||
ALTER TABLE SERVICE_HEARTBEAT ADD COLUMN NODE INT NOT NULL DEFAULT -1;
|
@@ -0,0 +1,17 @@
|
||||
ALTER TABLE FILE_STORAGE ADD COLUMN STATE VARCHAR(255) NOT NULL DEFAULT '';
|
||||
ALTER TABLE FILE_STORAGE DROP COLUMN DO_PURGE;
|
||||
|
||||
DROP VIEW FILE_STORAGE_VIEW;
|
||||
|
||||
CREATE VIEW FILE_STORAGE_VIEW
|
||||
AS SELECT
|
||||
CONCAT(BASE.PATH, '/', STORAGE.PATH) AS PATH,
|
||||
STORAGE.TYPE AS TYPE,
|
||||
STATE AS STATE,
|
||||
NODE AS NODE,
|
||||
DESCRIPTION AS DESCRIPTION,
|
||||
CREATE_DATE AS CREATE_DATE,
|
||||
STORAGE.ID AS ID,
|
||||
BASE.ID AS BASE_ID
|
||||
FROM FILE_STORAGE STORAGE
|
||||
INNER JOIN FILE_STORAGE_BASE BASE ON STORAGE.BASE_ID=BASE.ID;
|
@@ -0,0 +1,8 @@
|
||||
CREATE TABLE NODE_CONFIGURATION (
|
||||
ID INT PRIMARY KEY,
|
||||
DESCRIPTION VARCHAR(255),
|
||||
ACCEPT_QUERIES BOOLEAN,
|
||||
AUTO_CLEAN BOOLEAN DEFAULT TRUE,
|
||||
PRECESSION BOOLEAN DEFAULT TRUE,
|
||||
DISABLED BOOLEAN DEFAULT FALSE
|
||||
);
|
@@ -0,0 +1,10 @@
|
||||
ALTER TABLE FILE_STORAGE_BASE DROP COLUMN PERMIT_TEMP;
|
||||
ALTER TABLE FILE_STORAGE_BASE ADD COLUMN TYPE_NEW VARCHAR(255) NOT NULL;
|
||||
|
||||
UPDATE FILE_STORAGE_BASE SET TYPE_NEW = 'CURRENT' WHERE TYPE='SSD_INDEX';
|
||||
UPDATE FILE_STORAGE_BASE SET TYPE_NEW = 'WORK' WHERE TYPE='SSD_WORK';
|
||||
UPDATE FILE_STORAGE_BASE SET TYPE_NEW = 'STORAGE' WHERE TYPE='SLOW';
|
||||
UPDATE FILE_STORAGE_BASE SET TYPE_NEW = 'BACKUP' WHERE TYPE='BACKUP';
|
||||
|
||||
ALTER TABLE FILE_STORAGE_BASE DROP COLUMN TYPE;
|
||||
ALTER TABLE FILE_STORAGE_BASE CHANGE COLUMN TYPE_NEW TYPE VARCHAR(255) NOT NULL;
|
@@ -0,0 +1 @@
|
||||
UPDATE MESSAGE_QUEUE SET STATE='DEAD' WHERE STATE='NEW';
|
@@ -0,0 +1 @@
|
||||
DELETE FROM FILE_STORAGE WHERE TYPE IN ('INDEX_STAGING', 'INDEX_LIVE', 'SEARCH_SETS', 'LINKDB_LIVE', 'LINKDB_STAGING');
|
@@ -0,0 +1 @@
|
||||
ALTER TABLE EC_DOMAIN ADD COLUMN NODE_AFFINITY INT NOT NULL;
|
@@ -0,0 +1,9 @@
|
||||
ALTER TABLE WMSA_prod.EC_DOMAIN_LINK
|
||||
MODIFY COLUMN ID BIGINT NOT NULL AUTO_INCREMENT;
|
||||
|
||||
DELIMITER $$
|
||||
CREATE OR REPLACE PROCEDURE PURGE_LINKS_TABLE (IN nodeId INT)
|
||||
BEGIN
|
||||
DELETE EC_DOMAIN_LINK FROM EC_DOMAIN_LINK INNER JOIN WMSA_prod.EC_DOMAIN ON EC_DOMAIN_LINK.SOURCE_DOMAIN_ID = EC_DOMAIN.ID WHERE NODE_AFFINITY = nodeId;
|
||||
END$$
|
||||
DELIMITER ;
|
@@ -0,0 +1 @@
|
||||
ALTER TABLE WMSA_prod.NODE_CONFIGURATION ADD COLUMN KEEP_WARCS BOOLEAN DEFAULT FALSE;
|
@@ -0,0 +1,12 @@
|
||||
|
||||
CREATE TABLE IF NOT EXISTS CONF_DOMAIN_RANKING_SET (
|
||||
NAME VARCHAR(255) PRIMARY KEY COLLATE utf8mb4_unicode_ci,
|
||||
DESCRIPTION VARCHAR(255) NOT NULL,
|
||||
ALGORITHM VARCHAR(255) NOT NULL,
|
||||
DEPTH INT NOT NULL,
|
||||
DEFINITION LONGTEXT NOT NULL
|
||||
) CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
|
||||
|
||||
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('NONE', 'Reserved: No Ranking Algorithm', 'SPECIAL', 50000, '');
|
||||
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('BLOGS', 'Reserved: Blogs Set', 'SPECIAL', 50000, '');
|
||||
INSERT IGNORE INTO CONF_DOMAIN_RANKING_SET(NAME, DESCRIPTION, ALGORITHM, DEPTH, DEFINITION) VALUES ('RANK', 'Reserved: Main Domain Ranking', 'SPECIAL', 50000, '');
|
@@ -0,0 +1 @@
|
||||
ALTER TABLE MESSAGE_QUEUE ADD COLUMN AUDIT_RELATED_ID LONG NOT NULL DEFAULT -1 COMMENT 'To be applied to any new messages created while handling a message';
|
@@ -0,0 +1 @@
|
||||
DROP TABLE EC_DOMAIN_LINK;
|
@@ -0,0 +1 @@
|
||||
ALTER TABLE CONF_DOMAIN_RANKING_SET DROP COLUMN ALGORITHM;
|
@@ -0,0 +1 @@
|
||||
ALTER TABLE WMSA_prod.NODE_CONFIGURATION ADD COLUMN NODE_PROFILE VARCHAR(255) DEFAULT 'MIXED';
|
@@ -0,0 +1,91 @@
|
||||
package nu.marginalia.db;
|
||||
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.junit.jupiter.api.*;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
@Testcontainers
|
||||
@Tag("slow")
|
||||
class DomainRankingSetsServiceTest {
|
||||
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
static HikariDataSource dataSource;
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
|
||||
TestMigrationLoader.flywayMigration(dataSource);
|
||||
|
||||
// The migration SQL will insert a few default values, we want to remove them
|
||||
wipeDomainRankingSets(dataSource);
|
||||
}
|
||||
|
||||
@AfterEach
|
||||
public void tearDown() {
|
||||
wipeDomainRankingSets(dataSource);
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
static void tearDownAll() {
|
||||
dataSource.close();
|
||||
mariaDBContainer.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testScenarios() throws Exception {
|
||||
var service = new DomainRankingSetsService(dataSource);
|
||||
|
||||
var newValue = new DomainRankingSetsService.DomainRankingSet(
|
||||
"test",
|
||||
"Test domain set",
|
||||
10,
|
||||
"test\\.nu"
|
||||
);
|
||||
var newValue2 = new DomainRankingSetsService.DomainRankingSet(
|
||||
"test2",
|
||||
"Test domain set 2",
|
||||
20,
|
||||
"test\\.nu 2"
|
||||
);
|
||||
service.upsert(newValue);
|
||||
service.upsert(newValue2);
|
||||
assertEquals(newValue, service.get("test").orElseThrow());
|
||||
|
||||
var allValues = service.getAll();
|
||||
assertEquals(2, allValues.size());
|
||||
assertTrue(allValues.contains(newValue));
|
||||
assertTrue(allValues.contains(newValue2));
|
||||
|
||||
service.delete(newValue);
|
||||
assertFalse(service.get("test").isPresent());
|
||||
|
||||
service.delete(newValue2);
|
||||
assertFalse(service.get("test2").isPresent());
|
||||
|
||||
allValues = service.getAll();
|
||||
assertEquals(0, allValues.size());
|
||||
}
|
||||
|
||||
private static void wipeDomainRankingSets(HikariDataSource dataSource) {
|
||||
var service = new DomainRankingSetsService(dataSource);
|
||||
service.getAll().forEach(service::delete);
|
||||
}
|
||||
}
|
73
code/common/db/test/nu/marginalia/db/DomainTypesTest.java
Normal file
73
code/common/db/test/nu/marginalia/db/DomainTypesTest.java
Normal file
@@ -0,0 +1,73 @@
|
||||
package nu.marginalia.db;
|
||||
|
||||
import com.google.common.collect.Sets;
|
||||
import com.zaxxer.hikari.HikariConfig;
|
||||
import com.zaxxer.hikari.HikariDataSource;
|
||||
import nu.marginalia.test.TestMigrationLoader;
|
||||
import org.junit.jupiter.api.AfterAll;
|
||||
import org.junit.jupiter.api.BeforeAll;
|
||||
import org.junit.jupiter.api.Tag;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.testcontainers.containers.MariaDBContainer;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
|
||||
@Tag("slow")
|
||||
@Testcontainers
|
||||
public class DomainTypesTest {
|
||||
@Container
|
||||
static MariaDBContainer<?> mariaDBContainer = new MariaDBContainer<>("mariadb")
|
||||
.withDatabaseName("WMSA_prod")
|
||||
.withUsername("wmsa")
|
||||
.withPassword("wmsa")
|
||||
.withNetworkAliases("mariadb");
|
||||
|
||||
static HikariDataSource dataSource;
|
||||
static DomainTypes domainTypes;
|
||||
|
||||
@BeforeAll
|
||||
public static void setup() {
|
||||
HikariConfig config = new HikariConfig();
|
||||
config.setJdbcUrl(mariaDBContainer.getJdbcUrl());
|
||||
config.setUsername("wmsa");
|
||||
config.setPassword("wmsa");
|
||||
|
||||
dataSource = new HikariDataSource(config);
|
||||
TestMigrationLoader.flywayMigration(dataSource);
|
||||
|
||||
domainTypes = new DomainTypes(dataSource);
|
||||
}
|
||||
|
||||
@AfterAll
|
||||
public static void teardown() {
|
||||
dataSource.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void reloadDomainsList() throws SQLException, IOException {
|
||||
domainTypes.reloadDomainsList(DomainTypes.Type.TEST);
|
||||
|
||||
var downloadedDomains = new HashSet<>(domainTypes.getAllDomainsByType(DomainTypes.Type.TEST));
|
||||
|
||||
var expectedDomains = Set.of("www.marginalia.nu", "search.marginalia.nu", "docs.marginalia.nu",
|
||||
"encyclopedia.marginalia.nu", "memex.marginalia.nu");
|
||||
|
||||
assertEquals(expectedDomains.size(), downloadedDomains.size());
|
||||
assertEquals(Set.of(), Sets.symmetricDifference(expectedDomains, downloadedDomains));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void configure() throws SQLException {
|
||||
assertEquals("", domainTypes.getUrlForSelection(DomainTypes.Type.CRAWL));
|
||||
domainTypes.updateUrlForSelection(DomainTypes.Type.CRAWL, "test");
|
||||
assertEquals("test", domainTypes.getUrlForSelection(DomainTypes.Type.CRAWL));
|
||||
}
|
||||
|
||||
}
|
49
code/common/linkdb/build.gradle
Normal file
49
code/common/linkdb/build.gradle
Normal file
@@ -0,0 +1,49 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
configurations {
|
||||
flywayMigration.extendsFrom(implementation)
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:common:model')
|
||||
implementation project(':code:common:service')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.guava
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
implementation libs.bundles.gson
|
||||
|
||||
implementation libs.notnull
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
implementation libs.sqlite
|
||||
implementation libs.commons.lang3
|
||||
|
||||
implementation libs.trove
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
|
||||
testImplementation platform('org.testcontainers:testcontainers-bom:1.17.4')
|
||||
testImplementation libs.commons.codec
|
||||
testImplementation 'org.testcontainers:mariadb:1.17.4'
|
||||
testImplementation 'org.testcontainers:junit-jupiter:1.17.4'
|
||||
testImplementation project(':code:libraries:test-helpers')
|
||||
}
|
||||
|
@@ -0,0 +1,7 @@
|
||||
package nu.marginalia.linkdb;
|
||||
|
||||
public class LinkdbFileNames {
|
||||
public static String DEPRECATED_LINKDB_FILE_NAME = "links.db";
|
||||
public static String DOCDB_FILE_NAME = "documents.db";
|
||||
public static String DOMAIN_LINKS_FILE_NAME = "domain-links.dat";
|
||||
}
|
@@ -0,0 +1,135 @@
|
||||
package nu.marginalia.linkdb.docs;
|
||||
|
||||
import com.google.inject.Inject;
|
||||
import com.google.inject.Singleton;
|
||||
import com.google.inject.name.Named;
|
||||
import gnu.trove.list.TLongList;
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
import nu.marginalia.model.id.UrlIdCodec;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import java.nio.file.StandardCopyOption;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/** Reads the document database, which is a SQLite database
|
||||
* containing the URLs and metadata of the documents in the
|
||||
* index.
|
||||
* <p></p>
|
||||
* The database is created by the DocumentDbWriter class.
|
||||
* */
|
||||
@Singleton
|
||||
public class DocumentDbReader {
|
||||
private final Path dbFile;
|
||||
private volatile Connection connection;
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
@Inject
|
||||
public DocumentDbReader(@Named("docdb-file") Path dbFile) throws SQLException {
|
||||
this.dbFile = dbFile;
|
||||
|
||||
if (Files.exists(dbFile)) {
|
||||
connection = createConnection();
|
||||
}
|
||||
else {
|
||||
logger.warn("No docdb file {}", dbFile);
|
||||
}
|
||||
}
|
||||
|
||||
private Connection createConnection() throws SQLException {
|
||||
try {
|
||||
String connStr = "jdbc:sqlite:" + dbFile.toString();
|
||||
return DriverManager.getConnection(connStr);
|
||||
}
|
||||
catch (SQLException ex) {
|
||||
logger.error("Failed to connect to link database " + dbFile, ex);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Switches the input database file to a new file.
|
||||
* <p></p>
|
||||
* This is used to switch over to a new database file
|
||||
* when the index is re-indexed.
|
||||
* */
|
||||
public void switchInput(Path newDbFile) throws IOException, SQLException {
|
||||
if (!Files.isRegularFile(newDbFile)) {
|
||||
logger.error("Source is not a file, refusing switch-over {}", newDbFile);
|
||||
return;
|
||||
}
|
||||
|
||||
if (connection != null) {
|
||||
connection.close();
|
||||
}
|
||||
|
||||
logger.info("Moving {} to {}", newDbFile, dbFile);
|
||||
|
||||
Files.move(newDbFile, dbFile, StandardCopyOption.REPLACE_EXISTING);
|
||||
|
||||
connection = createConnection();
|
||||
}
|
||||
|
||||
/** Re-establishes the connection, useful in tests and not
|
||||
* much else */
|
||||
public void reconnect() throws SQLException {
|
||||
if (connection != null)
|
||||
connection.close();
|
||||
|
||||
connection = createConnection();
|
||||
}
|
||||
|
||||
/** Returns the URL details for the given document ids.
|
||||
* <p></p>
|
||||
* This is used to get the URL details for the search
|
||||
* results.
|
||||
* */
|
||||
public List<DocdbUrlDetail> getUrlDetails(TLongList ids) throws SQLException {
|
||||
List<DocdbUrlDetail> ret = new ArrayList<>(ids.size());
|
||||
|
||||
if (connection == null ||
|
||||
connection.isClosed())
|
||||
{
|
||||
throw new RuntimeException("URL query temporarily unavailable due to database switch");
|
||||
}
|
||||
|
||||
try (var stmt = connection.prepareStatement("""
|
||||
SELECT ID, URL, TITLE, DESCRIPTION, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR
|
||||
FROM DOCUMENT WHERE ID = ?
|
||||
""")) {
|
||||
for (int i = 0; i < ids.size(); i++) {
|
||||
long id = ids.get(i);
|
||||
stmt.setLong(1, id);
|
||||
var rs = stmt.executeQuery();
|
||||
if (rs.next()) {
|
||||
var url = new EdgeUrl(rs.getString("URL"));
|
||||
ret.add(new DocdbUrlDetail(
|
||||
rs.getLong("ID"),
|
||||
url,
|
||||
rs.getString("TITLE"),
|
||||
rs.getString("DESCRIPTION"),
|
||||
rs.getDouble("QUALITY"),
|
||||
rs.getString("FORMAT"),
|
||||
rs.getInt("FEATURES"),
|
||||
rs.getInt("PUB_YEAR"),
|
||||
rs.getLong("DATA_HASH"),
|
||||
rs.getInt("WORDS_TOTAL")
|
||||
));
|
||||
}
|
||||
}
|
||||
} catch (URISyntaxException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
@@ -0,0 +1,83 @@
|
||||
package nu.marginalia.linkdb.docs;
|
||||
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.util.List;
|
||||
|
||||
/** Writes the document database, which is a SQLite database
|
||||
* containing the URLs and metadata of the documents in the
|
||||
* index.
|
||||
* */
|
||||
public class DocumentDbWriter {
|
||||
|
||||
private final Connection connection;
|
||||
|
||||
public DocumentDbWriter(Path outputFile) throws SQLException {
|
||||
String connStr = "jdbc:sqlite:" + outputFile.toString();
|
||||
connection = DriverManager.getConnection(connStr);
|
||||
|
||||
try (var stream = ClassLoader.getSystemResourceAsStream("db/docdb-document.sql");
|
||||
var stmt = connection.createStatement()
|
||||
) {
|
||||
var sql = new String(stream.readAllBytes());
|
||||
stmt.executeUpdate(sql);
|
||||
|
||||
// Disable synchronous writing as this is a one-off operation with no recovery
|
||||
stmt.execute("PRAGMA synchronous = OFF");
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void add(DocdbUrlDetail docdbUrlDetail) throws SQLException {
|
||||
add(List.of(docdbUrlDetail));
|
||||
}
|
||||
|
||||
public void add(List<DocdbUrlDetail> docdbUrlDetail) throws SQLException {
|
||||
|
||||
try (var stmt = connection.prepareStatement("""
|
||||
INSERT OR IGNORE INTO DOCUMENT(ID, URL, TITLE, DESCRIPTION, WORDS_TOTAL, FORMAT, FEATURES, DATA_HASH, QUALITY, PUB_YEAR)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""")) {
|
||||
|
||||
int i = 0;
|
||||
for (var document : docdbUrlDetail) {
|
||||
var url = document.url();
|
||||
|
||||
stmt.setLong(1, document.urlId());
|
||||
stmt.setString(2, url.toString());
|
||||
|
||||
stmt.setString(3, document.title());
|
||||
stmt.setString(4, document.description());
|
||||
stmt.setInt(5, document.wordsTotal());
|
||||
stmt.setString(6, document.format());
|
||||
stmt.setInt(7, document.features());
|
||||
stmt.setLong(8, document.dataHash());
|
||||
stmt.setDouble(9, document.urlQuality());
|
||||
if (document.pubYear() == null) {
|
||||
stmt.setInt(10, 0);
|
||||
} else {
|
||||
stmt.setInt(10, document.pubYear());
|
||||
}
|
||||
|
||||
stmt.addBatch();
|
||||
|
||||
if (++i > 1000) {
|
||||
stmt.executeBatch();
|
||||
i = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (i != 0) stmt.executeBatch();
|
||||
}
|
||||
}
|
||||
|
||||
public void close() throws SQLException {
|
||||
connection.close();
|
||||
}
|
||||
}
|
@@ -0,0 +1,18 @@
|
||||
package nu.marginalia.linkdb.model;
|
||||
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
|
||||
public record DocdbUrlDetail(long urlId,
|
||||
EdgeUrl url,
|
||||
String title,
|
||||
String description,
|
||||
double urlQuality,
|
||||
String format,
|
||||
int features,
|
||||
Integer pubYear,
|
||||
long dataHash,
|
||||
int wordsTotal
|
||||
)
|
||||
|
||||
{
|
||||
}
|
19
code/common/linkdb/readme.md
Normal file
19
code/common/linkdb/readme.md
Normal file
@@ -0,0 +1,19 @@
|
||||
## Document Database
|
||||
|
||||
The document database contains information about links,
|
||||
such as their ID, their URL, their title, their description,
|
||||
and so forth.
|
||||
|
||||
The document database is a sqlite file. The reason this information
|
||||
is not in the MariaDB database is that this would make updates to
|
||||
this information take effect in production immediately, even before
|
||||
the information was searchable.
|
||||
|
||||
* [DocumentLinkDbWriter](java/nu/marginalia/linkdb/docs/DocumentDbWriter.java)
|
||||
* [DocumentLinkDbLoader](java/nu/marginalia/linkdb/docs/DocumentDbReader.java)
|
||||
|
||||
**TODO**: This module should probably be renamed and moved into some other package.
|
||||
|
||||
## See Also
|
||||
|
||||
The database is constructed by the [loading-process](../../processes/loading-process), and consumed by the [index-service](../../services-core/index-service).
|
17
code/common/linkdb/resources/db/docdb-document.sql
Normal file
17
code/common/linkdb/resources/db/docdb-document.sql
Normal file
@@ -0,0 +1,17 @@
|
||||
CREATE TABLE DOCUMENT (
|
||||
ID INT8 PRIMARY KEY,
|
||||
|
||||
URL TEXT,
|
||||
|
||||
STATE INT,
|
||||
TITLE TEXT NOT NULL,
|
||||
DESCRIPTION TEXT NOT NULL,
|
||||
|
||||
WORDS_TOTAL INTEGER NOT NULL,
|
||||
FORMAT TEXT NOT NULL,
|
||||
FEATURES INTEGER NOT NULL,
|
||||
|
||||
DATA_HASH INTEGER NOT NULL,
|
||||
QUALITY REAL NOT NULL,
|
||||
PUB_YEAR INTEGER NOT NULL
|
||||
);
|
@@ -0,0 +1,44 @@
|
||||
package nu.marginalia.linkdb;
|
||||
|
||||
import gnu.trove.list.array.TLongArrayList;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbReader;
|
||||
import nu.marginalia.linkdb.docs.DocumentDbWriter;
|
||||
import nu.marginalia.linkdb.model.DocdbUrlDetail;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.sql.SQLException;
|
||||
|
||||
public class DocumentDbWriterTest {
|
||||
@Test
|
||||
public void testCreate() throws IOException {
|
||||
Path tempPath = Files.createTempFile("docdb", ".db");
|
||||
try {
|
||||
var writer = new DocumentDbWriter(tempPath);
|
||||
writer.add(new DocdbUrlDetail(
|
||||
1,
|
||||
new nu.marginalia.model.EdgeUrl("http", new EdgeDomain("example.com"), null, "/", null),
|
||||
"Test",
|
||||
"This is a test",
|
||||
-4.,
|
||||
"XHTML",
|
||||
5,
|
||||
2020,
|
||||
0xF00BA3,
|
||||
444
|
||||
));
|
||||
writer.close();
|
||||
|
||||
var reader = new DocumentDbReader(tempPath);
|
||||
var deets = reader.getUrlDetails(new TLongArrayList(new long[]{1}));
|
||||
System.out.println(deets);
|
||||
} catch (SQLException e) {
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
Files.deleteIfExists(tempPath);
|
||||
}
|
||||
}
|
||||
}
|
41
code/common/model/build.gradle
Normal file
41
code/common/model/build.gradle
Normal file
@@ -0,0 +1,41 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
|
||||
id 'jvm-test-suite'
|
||||
}
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion.set(JavaLanguageVersion.of(rootProject.ext.jvmVersion))
|
||||
}
|
||||
}
|
||||
|
||||
apply from: "$rootProject.projectDir/srcsets.gradle"
|
||||
|
||||
dependencies {
|
||||
implementation project(':code:libraries:braille-block-punch-cards')
|
||||
implementation project(':code:libraries:coded-sequence')
|
||||
|
||||
implementation libs.bundles.slf4j
|
||||
|
||||
implementation libs.guava
|
||||
implementation dependencies.create(libs.guice.get()) {
|
||||
exclude group: 'com.google.guava'
|
||||
}
|
||||
implementation libs.bundles.gson
|
||||
|
||||
implementation libs.notnull
|
||||
|
||||
implementation libs.commons.lang3
|
||||
|
||||
implementation libs.trove
|
||||
implementation libs.fastutil
|
||||
|
||||
implementation libs.bundles.mariadb
|
||||
|
||||
testImplementation libs.bundles.slf4j.test
|
||||
testImplementation libs.bundles.junit
|
||||
testImplementation libs.mockito
|
||||
}
|
||||
|
||||
|
195
code/common/model/java/nu/marginalia/model/EdgeDomain.java
Normal file
195
code/common/model/java/nu/marginalia/model/EdgeDomain.java
Normal file
@@ -0,0 +1,195 @@
|
||||
package nu.marginalia.model;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.io.Serializable;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class EdgeDomain implements Serializable {
|
||||
|
||||
@Nonnull
|
||||
public final String subDomain;
|
||||
@Nonnull
|
||||
public final String topDomain;
|
||||
|
||||
public EdgeDomain(@Nonnull String host) {
|
||||
Objects.requireNonNull(host, "domain name must not be null");
|
||||
|
||||
host = host.toLowerCase();
|
||||
|
||||
// Remove trailing dots, which are allowed in DNS but not in URLs
|
||||
// (though sometimes still show up in the wild)
|
||||
while (!host.isBlank() && host.endsWith(".")) {
|
||||
host = host.substring(0, host.length() - 1);
|
||||
}
|
||||
|
||||
var dot = host.lastIndexOf('.');
|
||||
|
||||
if (dot < 0 || looksLikeAnIp(host)) { // IPV6 >.>
|
||||
subDomain = "";
|
||||
topDomain = host;
|
||||
} else {
|
||||
int dot2 = host.substring(0, dot).lastIndexOf('.');
|
||||
if (dot2 < 0) {
|
||||
subDomain = "";
|
||||
topDomain = host;
|
||||
} else {
|
||||
if (looksLikeGovTld(host)) { // Capture .ac.jp, .co.uk
|
||||
int dot3 = host.substring(0, dot2).lastIndexOf('.');
|
||||
if (dot3 >= 0) {
|
||||
dot2 = dot3;
|
||||
subDomain = host.substring(0, dot2);
|
||||
topDomain = host.substring(dot2 + 1);
|
||||
} else {
|
||||
subDomain = "";
|
||||
topDomain = host;
|
||||
}
|
||||
} else {
|
||||
subDomain = host.substring(0, dot2);
|
||||
topDomain = host.substring(dot2 + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final Predicate<String> govListTest = Pattern.compile(".*\\.(id|ac|co|org|gov|edu|com)\\.[a-z]{2}").asMatchPredicate();
|
||||
|
||||
public EdgeDomain(@Nonnull String subDomain, @Nonnull String topDomain) {
|
||||
this.subDomain = subDomain;
|
||||
this.topDomain = topDomain;
|
||||
}
|
||||
|
||||
public static String getTopDomain(String host) {
|
||||
return new EdgeDomain(host).topDomain;
|
||||
}
|
||||
|
||||
private boolean looksLikeGovTld(String host) {
|
||||
if (host.length() < 8)
|
||||
return false;
|
||||
int cnt = 0;
|
||||
for (int i = host.length() - 7; i < host.length(); i++) {
|
||||
if (host.charAt(i) == '.')
|
||||
cnt++;
|
||||
}
|
||||
return cnt >= 2 && govListTest.test(host);
|
||||
}
|
||||
|
||||
|
||||
private static final Predicate<String> ipPatternTest = Pattern.compile("[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}\\.[\\d]{1,3}").asMatchPredicate();
|
||||
|
||||
private boolean looksLikeAnIp(String host) {
|
||||
if (host.length() < 7)
|
||||
return false;
|
||||
|
||||
char firstChar = host.charAt(0);
|
||||
int lastChar = host.charAt(host.length() - 1);
|
||||
|
||||
return Character.isDigit(firstChar)
|
||||
&& Character.isDigit(lastChar)
|
||||
&& ipPatternTest.test(host);
|
||||
}
|
||||
|
||||
|
||||
public EdgeUrl toRootUrlHttp() {
|
||||
// Set default protocol to http, as most https websites redirect http->https, but few http websites redirect https->http
|
||||
return new EdgeUrl("http", this, null, "/", null);
|
||||
}
|
||||
|
||||
public EdgeUrl toRootUrlHttps() {
|
||||
return new EdgeUrl("https", this, null, "/", null);
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return getAddress();
|
||||
}
|
||||
|
||||
public String getAddress() {
|
||||
if (!subDomain.isEmpty()) {
|
||||
return subDomain + "." + topDomain;
|
||||
}
|
||||
return topDomain;
|
||||
}
|
||||
|
||||
public String getDomainKey() {
|
||||
int cutPoint = topDomain.indexOf('.');
|
||||
if (cutPoint < 0) {
|
||||
return topDomain;
|
||||
}
|
||||
return topDomain.substring(0, cutPoint).toLowerCase();
|
||||
}
|
||||
|
||||
/** If possible, try to provide an alias domain,
|
||||
* i.e. a domain name that is very likely to link to this one
|
||||
* */
|
||||
public Optional<EdgeDomain> aliasDomain() {
|
||||
if (subDomain.equals("www")) {
|
||||
return Optional.of(new EdgeDomain("", topDomain));
|
||||
} else if (subDomain.isBlank()){
|
||||
return Optional.of(new EdgeDomain("www", topDomain));
|
||||
}
|
||||
else return Optional.empty();
|
||||
}
|
||||
|
||||
|
||||
public boolean hasSameTopDomain(EdgeDomain other) {
|
||||
if (other == null) return false;
|
||||
|
||||
return topDomain.equalsIgnoreCase(other.topDomain);
|
||||
}
|
||||
|
||||
public String getTld() {
|
||||
int dot = -1;
|
||||
int length = topDomain.length();
|
||||
|
||||
if (ipPatternTest.test(topDomain)) {
|
||||
return "IP";
|
||||
}
|
||||
|
||||
if (govListTest.test(topDomain)) {
|
||||
dot = topDomain.indexOf('.', Math.max(0, length - ".edu.uk".length()));
|
||||
} else {
|
||||
dot = topDomain.lastIndexOf('.');
|
||||
}
|
||||
|
||||
|
||||
if (dot < 0 || dot == topDomain.length() - 1) {
|
||||
return "-";
|
||||
} else {
|
||||
return topDomain.substring(dot + 1);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean equals(final Object o) {
|
||||
if (o == this) return true;
|
||||
if (!(o instanceof EdgeDomain other)) return false;
|
||||
final String this$subDomain = this.getSubDomain();
|
||||
final String other$subDomain = other.getSubDomain();
|
||||
if (!Objects.equals(this$subDomain, other$subDomain)) return false;
|
||||
final String this$domain = this.getTopDomain();
|
||||
final String other$domain = other.getTopDomain();
|
||||
if (!Objects.equals(this$domain, other$domain)) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
final int PRIME = 59;
|
||||
int result = 1;
|
||||
final Object $subDomain = this.getSubDomain().toLowerCase();
|
||||
result = result * PRIME + $subDomain.hashCode();
|
||||
final Object $domain = this.getTopDomain().toLowerCase();
|
||||
result = result * PRIME + $domain.hashCode();
|
||||
return result;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
public String getSubDomain() {
|
||||
return this.subDomain;
|
||||
}
|
||||
|
||||
@Nonnull
|
||||
public String getTopDomain() {
|
||||
return this.topDomain;
|
||||
}
|
||||
}
|
457
code/common/model/java/nu/marginalia/model/EdgeUrl.java
Normal file
457
code/common/model/java/nu/marginalia/model/EdgeUrl.java
Normal file
@@ -0,0 +1,457 @@
|
||||
package nu.marginalia.model;
|
||||
|
||||
import nu.marginalia.util.QueryParams;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.io.Serializable;
|
||||
import java.net.*;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
|
||||
public class EdgeUrl implements Serializable {
|
||||
public final String proto;
|
||||
public final EdgeDomain domain;
|
||||
public final Integer port;
|
||||
public final String path;
|
||||
public final String param;
|
||||
|
||||
public EdgeUrl(String proto, EdgeDomain domain, Integer port, String path, String param) {
|
||||
this.proto = proto;
|
||||
this.domain = domain;
|
||||
this.port = port(port, proto);
|
||||
this.path = path;
|
||||
this.param = param;
|
||||
}
|
||||
|
||||
public EdgeUrl(String url) throws URISyntaxException {
|
||||
this(parseURI(url));
|
||||
}
|
||||
|
||||
private static URI parseURI(String url) throws URISyntaxException {
|
||||
try {
|
||||
return EdgeUriFactory.parseURILenient(url);
|
||||
} catch (URISyntaxException ex) {
|
||||
throw new URISyntaxException("Failed to parse URI '" + url + "'", ex.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public static Optional<EdgeUrl> parse(@Nullable String url) {
|
||||
try {
|
||||
if (null == url) {
|
||||
return Optional.empty();
|
||||
}
|
||||
|
||||
return Optional.of(new EdgeUrl(url));
|
||||
} catch (URISyntaxException e) {
|
||||
return Optional.empty();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public EdgeUrl(URI URI) {
|
||||
try {
|
||||
String host = URI.getHost();
|
||||
|
||||
if (host == null) { // deal with a rare serialization error
|
||||
host = "parse-error.invalid.example.com";
|
||||
}
|
||||
|
||||
this.domain = new EdgeDomain(host);
|
||||
this.path = URI.getPath().isEmpty() ? "/" : URI.getPath();
|
||||
this.proto = URI.getScheme().toLowerCase();
|
||||
this.port = port(URI.getPort(), proto);
|
||||
this.param = QueryParams.queryParamsSanitizer(this.path, URI.getQuery());
|
||||
} catch (Exception ex) {
|
||||
System.err.println("Failed to parse " + URI);
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
|
||||
public EdgeUrl(URL URL) {
|
||||
try {
|
||||
String host = URL.getHost();
|
||||
|
||||
if (host == null) { // deal with a rare serialization error
|
||||
host = "parse-error.invalid.example.com";
|
||||
}
|
||||
|
||||
this.domain = new EdgeDomain(host);
|
||||
this.path = URL.getPath().isEmpty() ? "/" : URL.getPath();
|
||||
this.proto = URL.getProtocol().toLowerCase();
|
||||
this.port = port(URL.getPort(), proto);
|
||||
this.param = QueryParams.queryParamsSanitizer(this.path, URL.getQuery());
|
||||
} catch (Exception ex) {
|
||||
System.err.println("Failed to parse " + URL);
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
|
||||
private static Integer port(Integer port, String protocol) {
|
||||
if (null == port || port < 1) {
|
||||
return null;
|
||||
}
|
||||
if (protocol.equals("http") && port == 80) {
|
||||
return null;
|
||||
} else if (protocol.equals("https") && port == 443) {
|
||||
return null;
|
||||
}
|
||||
return port;
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder(256);
|
||||
|
||||
sb.append(proto);
|
||||
sb.append("://");
|
||||
sb.append(domain);
|
||||
|
||||
if (port != null) {
|
||||
sb.append(':');
|
||||
sb.append(port);
|
||||
}
|
||||
|
||||
EdgeUriFactory.urlencodePath(sb, path);
|
||||
|
||||
if (param != null) {
|
||||
EdgeUriFactory.urlencodeQuery(sb, param);
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
|
||||
public String toDisplayString() {
|
||||
StringBuilder sb = new StringBuilder(256);
|
||||
|
||||
sb.append(proto);
|
||||
sb.append("://");
|
||||
sb.append(domain);
|
||||
|
||||
if (port != null) {
|
||||
sb.append(':');
|
||||
sb.append(port);
|
||||
}
|
||||
|
||||
sb.append(path);
|
||||
|
||||
if (param != null) {
|
||||
sb.append('?').append(param);
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public String dir() {
|
||||
return path.replaceAll("/[^/]+$", "/");
|
||||
}
|
||||
|
||||
public String fileName() {
|
||||
return path.replaceAll(".*/", "");
|
||||
}
|
||||
|
||||
public int depth() {
|
||||
return (int) path.chars().filter(c -> c == '/').count();
|
||||
}
|
||||
|
||||
public EdgeUrl withPathAndParam(String path, String param) {
|
||||
return new EdgeUrl(proto, domain, port, path, param);
|
||||
}
|
||||
|
||||
public boolean equals(Object other) {
|
||||
if (other == null) return false;
|
||||
if (other == this) return true;
|
||||
if (other instanceof EdgeUrl e) {
|
||||
return Objects.equals(e.domain, domain)
|
||||
&& Objects.equals(e.path, path)
|
||||
&& Objects.equals(e.param, param);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean equalsExactly(Object other) {
|
||||
if (other == null) return false;
|
||||
if (other == this) return true;
|
||||
if (other instanceof EdgeUrl e) {
|
||||
return Objects.equals(e.proto, proto)
|
||||
&& Objects.equals(e.domain, domain)
|
||||
&& Objects.equals(e.port, port)
|
||||
&& Objects.equals(e.path, path)
|
||||
&& Objects.equals(e.param, param);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
public int hashCode() {
|
||||
return Objects.hash(domain, path, param);
|
||||
}
|
||||
|
||||
public URL asURL() throws MalformedURLException {
|
||||
try {
|
||||
return asURI().toURL();
|
||||
} catch (URISyntaxException e) {
|
||||
throw new MalformedURLException(e.getMessage());
|
||||
}
|
||||
}
|
||||
|
||||
public URI asURI() throws URISyntaxException {
|
||||
if (port != null) {
|
||||
return new URI(this.proto, null, this.domain.toString(), this.port, this.path, this.param, null);
|
||||
}
|
||||
|
||||
return new URI(this.proto, this.domain.toString(), this.path, this.param, null);
|
||||
}
|
||||
|
||||
public EdgeDomain getDomain() {
|
||||
return this.domain;
|
||||
}
|
||||
|
||||
public String getProto() {
|
||||
return this.proto;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class EdgeUriFactory {
|
||||
public static URI parseURILenient(String url) throws URISyntaxException {
|
||||
|
||||
if (shouldOmitUrlencodeRepair(url)) {
|
||||
try {
|
||||
return new URI(url);
|
||||
}
|
||||
catch (URISyntaxException ex) {
|
||||
// ignore and run the lenient parser
|
||||
}
|
||||
}
|
||||
|
||||
var s = new StringBuilder(url.length()+8);
|
||||
|
||||
int pathIdx = findPathIdx(url);
|
||||
if (pathIdx < 0) { // url looks like http://marginalia.nu
|
||||
return new URI(url + "/");
|
||||
}
|
||||
s.append(url, 0, pathIdx);
|
||||
|
||||
// We don't want the fragment, and multiple fragments breaks the Java URIParser for some reason
|
||||
int end = url.indexOf("#");
|
||||
if (end < 0) end = url.length();
|
||||
|
||||
int queryIdx = url.indexOf('?');
|
||||
if (queryIdx < 0) queryIdx = end;
|
||||
|
||||
urlencodePath(s, url.substring(pathIdx, queryIdx));
|
||||
if (queryIdx < end) {
|
||||
urlencodeQuery(s, url.substring(queryIdx + 1, end));
|
||||
}
|
||||
return new URI(s.toString());
|
||||
}
|
||||
|
||||
/** Break apart the path element of an URI into its components, and then
|
||||
* urlencode any component that needs it, and recombine it into a single
|
||||
* path element again.
|
||||
*/
|
||||
public static void urlencodePath(StringBuilder sb, String path) {
|
||||
if (path == null || path.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
String[] pathParts = StringUtils.split(path, '/');
|
||||
if (pathParts.length == 0) {
|
||||
sb.append('/');
|
||||
return;
|
||||
}
|
||||
|
||||
boolean shouldUrlEncode = false;
|
||||
for (String pathPart : pathParts) {
|
||||
if (pathPart.isEmpty()) continue;
|
||||
|
||||
if (needsUrlEncode(pathPart)) {
|
||||
shouldUrlEncode = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (String pathPart : pathParts) {
|
||||
if (pathPart.isEmpty()) continue;
|
||||
|
||||
if (shouldUrlEncode) {
|
||||
sb.append('/');
|
||||
sb.append(URLEncoder.encode(pathPart, StandardCharsets.UTF_8).replace("+", "%20"));
|
||||
} else {
|
||||
sb.append('/');
|
||||
sb.append(pathPart);
|
||||
}
|
||||
}
|
||||
|
||||
if (path.endsWith("/")) {
|
||||
sb.append('/');
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/** Break apart the query element of a URI into its components, and then
|
||||
* urlencode any component that needs it, and recombine it into a single
|
||||
* query element again.
|
||||
*/
|
||||
public static void urlencodeQuery(StringBuilder sb, String param) {
|
||||
if (param == null || param.isEmpty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
String[] queryParts = StringUtils.split(param, '&');
|
||||
|
||||
boolean shouldUrlEncode = false;
|
||||
for (String queryPart : queryParts) {
|
||||
if (queryPart.isEmpty()) continue;
|
||||
|
||||
if (needsUrlEncode(queryPart)) {
|
||||
shouldUrlEncode = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
boolean first = true;
|
||||
for (String queryPart : queryParts) {
|
||||
if (queryPart.isEmpty()) continue;
|
||||
|
||||
if (first) {
|
||||
sb.append('?');
|
||||
first = false;
|
||||
} else {
|
||||
sb.append('&');
|
||||
}
|
||||
|
||||
if (shouldUrlEncode) {
|
||||
int idx = queryPart.indexOf('=');
|
||||
if (idx < 0) {
|
||||
sb.append(URLEncoder.encode(queryPart, StandardCharsets.UTF_8));
|
||||
} else {
|
||||
sb.append(URLEncoder.encode(queryPart.substring(0, idx), StandardCharsets.UTF_8));
|
||||
sb.append('=');
|
||||
sb.append(URLEncoder.encode(queryPart.substring(idx + 1), StandardCharsets.UTF_8));
|
||||
}
|
||||
} else {
|
||||
sb.append(queryPart);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Test if the url element needs URL encoding.
|
||||
* <p></p>
|
||||
* Note we may have been given an already encoded path element,
|
||||
* so we include % and + in the list of good characters
|
||||
*/
|
||||
static boolean needsUrlEncode(String urlElement) {
|
||||
for (int i = 0; i < urlElement.length(); i++) {
|
||||
char c = urlElement.charAt(i);
|
||||
|
||||
if (isUrlSafe(c)) continue;
|
||||
if ("+".indexOf(c) >= 0) continue;
|
||||
if (c == '%' && i + 2 < urlElement.length()) {
|
||||
char c1 = urlElement.charAt(i + 1);
|
||||
char c2 = urlElement.charAt(i + 2);
|
||||
if (isHexDigit(c1) && isHexDigit(c2)) {
|
||||
i += 2;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static boolean isUrlSafe(int c) {
|
||||
if (c >= 'a' && c <= 'z') return true;
|
||||
if (c >= 'A' && c <= 'Z') return true;
|
||||
if (c >= '0' && c <= '9') return true;
|
||||
if (c == '-' || c == '_' || c == '.' || c == '~') return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Test if the URL is a valid URL that does not need to be
|
||||
* urlencoded.
|
||||
* <p></p>
|
||||
* This is a very simple heuristic test that does not guarantee
|
||||
* that the URL is valid, but it will identify cases where we
|
||||
* are fairly certain that the URL does not need encoding,
|
||||
* so we can skip a bunch of allocations and string operations
|
||||
* that would otherwise be needed to fix the URL.
|
||||
*/
|
||||
static boolean shouldOmitUrlencodeRepair(String url) {
|
||||
int idx = 0;
|
||||
final int len = url.length();
|
||||
|
||||
// Validate the scheme
|
||||
while (idx < len - 2) {
|
||||
char c = url.charAt(idx++);
|
||||
if (c == ':') break;
|
||||
if (!isAsciiAlphabetic(c)) return false;
|
||||
}
|
||||
if (url.charAt(idx++) != '/') return false;
|
||||
if (url.charAt(idx++) != '/') return false;
|
||||
|
||||
// Validate the authority
|
||||
while (idx < len) {
|
||||
char c = url.charAt(idx++);
|
||||
if (c == '/') break;
|
||||
if (c == ':') continue;
|
||||
if (c == '@') continue;
|
||||
if (!isUrlSafe(c)) return false;
|
||||
}
|
||||
|
||||
// Validate the path
|
||||
if (idx >= len) return true;
|
||||
|
||||
while (idx < len) {
|
||||
char c = url.charAt(idx++);
|
||||
if (c == '?') break;
|
||||
if (c == '/') continue;
|
||||
if (c == '#') return true;
|
||||
if (!isUrlSafe(c)) return false;
|
||||
}
|
||||
|
||||
if (idx >= len) return true;
|
||||
|
||||
// Validate the query
|
||||
while (idx < len) {
|
||||
char c = url.charAt(idx++);
|
||||
if (c == '&') continue;
|
||||
if (c == '=') continue;
|
||||
if (c == '#') return true;
|
||||
if (!isUrlSafe(c)) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
private static boolean isAsciiAlphabetic(int c) {
|
||||
return (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
|
||||
}
|
||||
|
||||
private static boolean isHexDigit(int c) {
|
||||
return (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
|
||||
}
|
||||
|
||||
/** Find the index of the path element in a URL.
|
||||
* <p></p>
|
||||
* The path element starts after the scheme and authority part of the URL,
|
||||
* which is everything up to and including the first slash after the colon.
|
||||
*/
|
||||
private static int findPathIdx(String url) throws URISyntaxException {
|
||||
int colonIdx = url.indexOf(':');
|
||||
if (colonIdx < 0 || colonIdx + 3 >= url.length()) {
|
||||
throw new URISyntaxException(url, "Lacking scheme");
|
||||
}
|
||||
return url.indexOf('/', colonIdx + 3);
|
||||
}
|
||||
|
||||
|
||||
}
|
@@ -0,0 +1,18 @@
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
public enum DomainIndexingState {
|
||||
ACTIVE("Active"),
|
||||
EXHAUSTED("Fully Crawled"),
|
||||
SPECIAL("Content is side-loaded"),
|
||||
SOCIAL_MEDIA("Social media-like website"),
|
||||
BLOCKED("Blocked"),
|
||||
REDIR("Redirected to another domain"),
|
||||
ERROR("Error during crawling"),
|
||||
UNKNOWN("Unknown");
|
||||
|
||||
public String desc;
|
||||
|
||||
DomainIndexingState(String desc) {
|
||||
this.desc = desc;
|
||||
}
|
||||
}
|
@@ -0,0 +1,96 @@
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
public enum HtmlFeature {
|
||||
// Note, the first 32 of these features are bit encoded in the database
|
||||
// so be sure to keep anything that's potentially important toward the top
|
||||
// of the list
|
||||
|
||||
MEDIA( "special:media"),
|
||||
JS("special:scripts"),
|
||||
AFFILIATE_LINK( "special:affiliate"),
|
||||
TRACKING("special:tracking"),
|
||||
TRACKING_ADTECH("special:ads"), // We'll call this ads for now
|
||||
|
||||
KEBAB_CASE_URL("special:kcurl"), // https://www.example.com/urls-that-look-like-this/
|
||||
LONG_URL("special:longurl"),
|
||||
|
||||
CLOUDFLARE_FEATURE("special:cloudflare"),
|
||||
CDN_FEATURE("special:cdn"),
|
||||
|
||||
VIEWPORT("special:viewport"),
|
||||
|
||||
COOKIES("special:cookies"),
|
||||
CATEGORY_FOOD("category:food"),
|
||||
ADVERTISEMENT("special:ads"),
|
||||
CATEGORY_CRAFTS("category:crafts"),
|
||||
|
||||
GA_SPAM("special:gaspam"),
|
||||
|
||||
/** For fingerprinting and ranking */
|
||||
OPENGRAPH("special:opengraph"),
|
||||
OPENGRAPH_IMAGE("special:opengraph:image"),
|
||||
TWITTERCARD("special:twittercard"),
|
||||
TWITTERCARD_IMAGE("special:twittercard:image"),
|
||||
FONTAWSESOME("special:fontawesome"),
|
||||
GOOGLEFONTS("special:googlefonts"),
|
||||
DNS_PREFETCH("special:dnsprefetch"),
|
||||
PRELOAD("special:preload"),
|
||||
PRECONNECT("special:preconnect"),
|
||||
PINGBACK("special:pingback"),
|
||||
FEED("special:feed"),
|
||||
WEBMENTION("special:webmention"),
|
||||
INDIEAUTH("special:indieauth"),
|
||||
ME_TAG("special:metag"),
|
||||
NEXT_TAG("special:nexttag"),
|
||||
AMPHTML("special:amphtml"),
|
||||
JSON_LD("special:jsonld"),
|
||||
ORIGIN_TRIAL("special:origintrial"),
|
||||
PROFILE_GMPG("special:profile-gpmg"),
|
||||
QUANTCAST("special:quantcast"),
|
||||
COOKIELAW("special:cookielaw"),
|
||||
DIDOMI("special:didomi"),
|
||||
PARDOT("special:pardot"),
|
||||
ONESIGNAL("special:onesignal"),
|
||||
DATE_TAG("special:date_tag"),
|
||||
NOSCRIPT_TAG("special:noscript_tag"),
|
||||
|
||||
ROBOTS_INDEX("robots:index"),
|
||||
ROBOTS_FOLLOW("robots:follow"),
|
||||
ROBOTS_NOODP("robots:noodp"),
|
||||
ROBOTS_NOYDIR("robots:noydir"),
|
||||
DOFOLLOW_LINK("special:dofollow"),
|
||||
APPLE_TOUCH_ICON("special:appleicon"),
|
||||
|
||||
S3_FEATURE("special:s3"),
|
||||
|
||||
UNKNOWN("special:uncategorized");
|
||||
|
||||
|
||||
private final String keyword;
|
||||
|
||||
HtmlFeature(String keyword) {
|
||||
this.keyword = keyword;
|
||||
}
|
||||
|
||||
public String getKeyword() {
|
||||
return keyword;
|
||||
}
|
||||
|
||||
public static int encode(Collection<HtmlFeature> featuresAll) {
|
||||
int ret = 0;
|
||||
for (var feature : featuresAll) {
|
||||
ret |= (1 << (feature.ordinal()));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static boolean hasFeature(int value, HtmlFeature feature) {
|
||||
return (value & (1<< feature.ordinal())) != 0;
|
||||
}
|
||||
|
||||
public int getFeatureBit() {
|
||||
return (1<< ordinal());
|
||||
}
|
||||
}
|
@@ -1,4 +1,4 @@
|
||||
package nu.marginalia.wmsa.edge.converting.processor.logic.pubdate;
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
@@ -57,5 +57,8 @@ public record PubDate(String dateIso8601, int year) {
|
||||
public static int fromYearByte(int yearByte) {
|
||||
return yearByte + ENCODING_OFFSET;
|
||||
}
|
||||
public static int toYearByte(int year) {
|
||||
return Math.max(0, year - ENCODING_OFFSET);
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,10 @@
|
||||
package nu.marginalia.model.crawl;
|
||||
|
||||
/** This should correspond to EC_URL.STATE */
|
||||
public enum UrlIndexingState {
|
||||
OK,
|
||||
REDIRECT,
|
||||
DEAD,
|
||||
DISQUALIFIED
|
||||
|
||||
}
|
@@ -0,0 +1,27 @@
|
||||
package nu.marginalia.model.gson;
|
||||
|
||||
import com.google.gson.*;
|
||||
import marcono1234.gson.recordadapter.RecordTypeAdapterFactory;
|
||||
import nu.marginalia.model.EdgeDomain;
|
||||
import nu.marginalia.model.EdgeUrl;
|
||||
|
||||
import java.net.URISyntaxException;
|
||||
|
||||
public class GsonFactory {
|
||||
public static Gson get() {
|
||||
return new GsonBuilder()
|
||||
.registerTypeAdapterFactory(RecordTypeAdapterFactory.builder().allowMissingComponentValues().create())
|
||||
.registerTypeAdapter(EdgeUrl.class, (JsonSerializer<EdgeUrl>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
|
||||
.registerTypeAdapter(EdgeDomain.class, (JsonSerializer<EdgeDomain>) (src, typeOfSrc, context) -> new JsonPrimitive(src.toString()))
|
||||
.registerTypeAdapter(EdgeUrl.class, (JsonDeserializer<EdgeUrl>) (json, typeOfT, context) -> {
|
||||
try {
|
||||
return new EdgeUrl(json.getAsString());
|
||||
} catch (URISyntaxException e) {
|
||||
throw new JsonParseException("URL Parse Exception", e);
|
||||
}
|
||||
})
|
||||
.registerTypeAdapter(EdgeDomain.class, (JsonDeserializer<EdgeDomain>) (json, typeOfT, context) -> new EdgeDomain(json.getAsString()))
|
||||
.serializeSpecialFloatingPointValues()
|
||||
.create();
|
||||
}
|
||||
}
|
@@ -0,0 +1,22 @@
|
||||
package nu.marginalia.model.html;
|
||||
|
||||
// This class really doesn't belong anywhere, but will squat here for now
|
||||
public enum HtmlStandard {
|
||||
PLAIN(0, 1),
|
||||
UNKNOWN(0, 1),
|
||||
HTML123(0, 1),
|
||||
HTML4(-0.1, 1.05),
|
||||
XHTML(-0.1, 1.05),
|
||||
HTML5(0.5, 1.1);
|
||||
|
||||
/** Used to tune quality score */
|
||||
public final double offset;
|
||||
/** Used to tune quality score */
|
||||
public final double scale;
|
||||
|
||||
HtmlStandard(double offset, double scale) {
|
||||
this.offset = offset;
|
||||
this.scale = scale;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,93 @@
|
||||
package nu.marginalia.model.id;
|
||||
|
||||
/** URL id encoding scheme, including an optional ranking part that's used in the indices and washed away
|
||||
* outside. The ranking part is put in the highest bits so that when we sort the documents by id, they're
|
||||
* actually sorted by rank. Next is the domain id part, which keeps documents from the same domain clustered.
|
||||
* Finally is the document ordinal part, which is a non-unique sequence number for within the current set of
|
||||
* documents loaded. The same ID may be re-used over time as a new index is loaded.
|
||||
* <p></p>
|
||||
* <table>
|
||||
* <tr><th>Part</th><th>Bits</th><th>Cardinality</th></tr>
|
||||
* <tr>
|
||||
* <td>rank</td><td>6 bits</td><td>64</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>domain</td><td>31 bits</td><td>2 billion</td>
|
||||
* </tr>
|
||||
* <tr>
|
||||
* <td>document</td><td>26 bits</td><td>67 million</td>
|
||||
* </tr>
|
||||
* </table>
|
||||
* <p></p>
|
||||
* Most significant bit is unused for now because I'm not routing Long.compareUnsigned() all over the codebase.
|
||||
* <i>If</i> we end up needing more domains, we'll cross that bridge when we come to it.
|
||||
*
|
||||
* <h2>Coding Scheme</h2>
|
||||
* <code><pre>
|
||||
* [ | rank | domain | url ]
|
||||
* 0 1 6 38 64
|
||||
* </pre></code>
|
||||
*/
|
||||
public class UrlIdCodec {
|
||||
private static final long RANK_MASK = 0xFE00_0000_0000_0000L;
|
||||
private static final int DOCORD_MASK = 0x03FF_FFFF;
|
||||
|
||||
/** Encode a URL id without a ranking element */
|
||||
public static long encodeId(int domainId, int documentOrdinal) {
|
||||
domainId &= 0x7FFF_FFFF;
|
||||
documentOrdinal &= 0x03FF_FFFF;
|
||||
|
||||
assert (domainId & 0x7FFF_FFFF) == domainId : "Domain id must be in [0, 2^31-1], was " + domainId;
|
||||
assert (documentOrdinal & 0x03FF_FFFF) == documentOrdinal : "Document ordinal must be in [0, 2^26-1], was " + documentOrdinal;
|
||||
|
||||
return ((long) domainId << 26) | documentOrdinal;
|
||||
}
|
||||
|
||||
/** Encode a URL id with a ranking element */
|
||||
public static long encodeId(int rank, int domainId, int documentOrdinal) {
|
||||
assert (rank & 0x3F) == rank : "Rank must be in [0, 63], was " + rank;
|
||||
assert (domainId & 0x7FFF_FFFF) == domainId : "Domain id must be in [0, 2^31-1], was " + domainId;
|
||||
assert (documentOrdinal & 0x03FF_FFFF) == documentOrdinal : "Document ordinal must be in [0, 2^26-1], was " + documentOrdinal;
|
||||
|
||||
domainId &= 0x7FFF_FFFF;
|
||||
documentOrdinal &= 0x03FF_FFFF;
|
||||
rank &= 0x3F;
|
||||
|
||||
return ((long) rank << 57) | ((long) domainId << 26) | documentOrdinal;
|
||||
}
|
||||
/** Add a ranking element to an existing combined URL id.
|
||||
*
|
||||
* @param rank [0,1] the importance of the domain, low is good
|
||||
* @param urlId
|
||||
*/
|
||||
public static long addRank(float rank, long urlId) {
|
||||
long rankPart = (int)(rank * (1<<6));
|
||||
|
||||
if (rankPart >= 64) rankPart = 63;
|
||||
if (rankPart < 0) rankPart = 0;
|
||||
|
||||
return (urlId&(~RANK_MASK)) | (rankPart << 57);
|
||||
}
|
||||
|
||||
/** Extract the domain component from this URL id */
|
||||
public static int getDomainId(long combinedId) {
|
||||
return (int) ((combinedId >>> 26) & 0x7FFF_FFFFL);
|
||||
}
|
||||
|
||||
/** Extract the document ordinal component from this URL id */
|
||||
public static int getDocumentOrdinal(long combinedId) {
|
||||
return (int) (combinedId & DOCORD_MASK);
|
||||
}
|
||||
|
||||
|
||||
/** Extract the document ordinal component from this URL id */
|
||||
public static int getRank(long combinedId) {
|
||||
return (int) (combinedId >>> 57) & 0x3F;
|
||||
}
|
||||
|
||||
/** Mask out the ranking element from this URL id */
|
||||
public static long removeRank(long combinedId) {
|
||||
return combinedId & ~RANK_MASK;
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,6 @@
|
||||
package nu.marginalia.model.idx;
|
||||
|
||||
import nu.marginalia.sequence.VarintCodedSequence;
|
||||
|
||||
public record CodedWordSpan(byte code, VarintCodedSequence spans) {
|
||||
}
|
@@ -0,0 +1,35 @@
|
||||
package nu.marginalia.model.idx;
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
public enum DocumentFlags {
|
||||
Javascript,
|
||||
PlainText,
|
||||
GeneratorDocs,
|
||||
GeneratorForum,
|
||||
GeneratorWiki,
|
||||
Sideloaded,
|
||||
Unused7,
|
||||
Unused8,
|
||||
;
|
||||
|
||||
public int asBit() {
|
||||
return 1 << ordinal();
|
||||
}
|
||||
|
||||
public boolean isPresent(long value) {
|
||||
return (asBit() & value) > 0;
|
||||
}
|
||||
|
||||
public static EnumSet<DocumentFlags> decode(long encodedValue) {
|
||||
EnumSet<DocumentFlags> ret = EnumSet.noneOf(DocumentFlags.class);
|
||||
|
||||
for (DocumentFlags f : values()) {
|
||||
if ((encodedValue & f.asBit() & 0xff) > 0) {
|
||||
ret.add(f);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
@@ -0,0 +1,168 @@
|
||||
package nu.marginalia.model.idx;
|
||||
|
||||
import nu.marginalia.model.crawl.PubDate;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Set;
|
||||
|
||||
import static java.lang.Math.max;
|
||||
import static java.lang.Math.min;
|
||||
|
||||
/** Document level metadata designed to fit in a single 64 bit long.
|
||||
*
|
||||
* @param avgSentLength average sentence length
|
||||
* @param rank domain ranking
|
||||
* @param encDomainSize encoded number of documents in the domain
|
||||
* @param topology a measure of how important the document is
|
||||
* @param year encoded publishing year
|
||||
* @param sets bit mask for search sets
|
||||
* @param quality quality of the document (0-15); 0 is best, 15 is worst
|
||||
* @param flags flags (see {@link DocumentFlags})
|
||||
*/
|
||||
public record DocumentMetadata(int avgSentLength,
|
||||
int rank,
|
||||
int encDomainSize,
|
||||
int topology,
|
||||
int year,
|
||||
int sets,
|
||||
int quality,
|
||||
byte flags)
|
||||
implements Serializable
|
||||
{
|
||||
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder(getClass().getSimpleName());
|
||||
sb.append('[')
|
||||
.append("avgSentL=").append(avgSentLength).append(", ")
|
||||
.append("rank=").append(rank).append(", ")
|
||||
.append("domainSize=").append(ENC_DOMAIN_SIZE_MULTIPLIER * encDomainSize).append(", ")
|
||||
.append("topology=").append(topology).append(", ")
|
||||
.append("year=").append(PubDate.fromYearByte(year)).append(", ")
|
||||
.append("sets=").append(sets).append(", ")
|
||||
.append("quality=").append(quality).append(", ")
|
||||
.append("flags=").append(flagSet()).append("]");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
public static final long ASL_MASK = 0x03L;
|
||||
public static final int ASL_SHIFT = 56;
|
||||
|
||||
public static final long RANK_MASK = 0xFFL;
|
||||
public static final int RANK_SHIFT = 48;
|
||||
|
||||
public static final long ENC_DOMAIN_SIZE_MASK = 0xFFL;
|
||||
public static final int ENC_DOMAIN_SIZE_SHIFT = 40;
|
||||
public static final int ENC_DOMAIN_SIZE_MULTIPLIER = 5;
|
||||
|
||||
public static final long TOPOLOGY_MASK = 0xFFL;
|
||||
|
||||
public static final int TOPOLOGY_SHIFT = 32;
|
||||
|
||||
public static final long YEAR_MASK = 0xFFL;
|
||||
public static final int YEAR_SHIFT = 24;
|
||||
|
||||
public static final long SETS_MASK = 0xFL;
|
||||
public static final int SETS_SHIFT = 16;
|
||||
|
||||
public static final long QUALITY_MASK = 0xFL;
|
||||
public static final int QUALITY_SHIFT = 8;
|
||||
|
||||
public static long defaultValue() {
|
||||
return 0L;
|
||||
}
|
||||
public DocumentMetadata() {
|
||||
this(defaultValue());
|
||||
}
|
||||
|
||||
public DocumentMetadata(int avgSentLength, int year, int quality, EnumSet<DocumentFlags> flags) {
|
||||
this(avgSentLength, 0, 0, 0, year, 0, quality, encodeFlags(flags));
|
||||
}
|
||||
|
||||
public DocumentMetadata withSizeAndTopology(int size, int topology) {
|
||||
final int encSize = (int) Math.min(ENC_DOMAIN_SIZE_MASK, Math.max(1, size / ENC_DOMAIN_SIZE_MULTIPLIER));
|
||||
|
||||
return new DocumentMetadata(avgSentLength, rank, encSize, topology, year, sets, quality, flags);
|
||||
}
|
||||
|
||||
private static byte encodeFlags(Set<DocumentFlags> flags) {
|
||||
byte ret = 0;
|
||||
for (var flag : flags) { ret |= flag.asBit(); }
|
||||
return ret;
|
||||
}
|
||||
|
||||
public boolean hasFlag(DocumentFlags flag) {
|
||||
return (flags & flag.asBit()) != 0;
|
||||
}
|
||||
|
||||
public DocumentMetadata(long value) {
|
||||
this(
|
||||
(int) ((value >>> ASL_SHIFT) & ASL_MASK),
|
||||
(int) ((value >>> RANK_SHIFT) & RANK_MASK),
|
||||
(int) ((value >>> ENC_DOMAIN_SIZE_SHIFT) & ENC_DOMAIN_SIZE_MASK),
|
||||
(int) ((value >>> TOPOLOGY_SHIFT) & TOPOLOGY_MASK),
|
||||
(int) ((value >>> YEAR_SHIFT) & YEAR_MASK),
|
||||
(int) ((value >>> SETS_SHIFT) & SETS_MASK),
|
||||
(int) ((value >>> QUALITY_SHIFT) & QUALITY_MASK),
|
||||
(byte) (value & 0xFF)
|
||||
);
|
||||
}
|
||||
|
||||
public static boolean hasFlags(long encoded, long metadataBitMask) {
|
||||
return ((encoded & 0xFF) & metadataBitMask) == metadataBitMask;
|
||||
}
|
||||
|
||||
public long encode() {
|
||||
long ret = 0;
|
||||
ret |= Byte.toUnsignedLong(flags);
|
||||
ret |= min(QUALITY_MASK, max(0, quality)) << QUALITY_SHIFT;
|
||||
ret |= min(SETS_MASK, max(0, sets)) << SETS_SHIFT;
|
||||
ret |= min(YEAR_MASK, max(0, year)) << YEAR_SHIFT;
|
||||
ret |= min(TOPOLOGY_MASK, max(0, topology)) << TOPOLOGY_SHIFT;
|
||||
ret |= min(ENC_DOMAIN_SIZE_MASK, max(0, encDomainSize)) << ENC_DOMAIN_SIZE_SHIFT;
|
||||
ret |= min(RANK_MASK, max(0, rank)) << RANK_SHIFT;
|
||||
ret |= min(ASL_MASK, max(0, avgSentLength)) << ASL_SHIFT;
|
||||
return ret;
|
||||
}
|
||||
|
||||
public boolean isEmpty() {
|
||||
return avgSentLength == 0 && encDomainSize == 0 && topology == 0 && sets == 0 && quality == 0 && year == 0 && flags == 0 && rank == 0;
|
||||
}
|
||||
|
||||
public static int decodeQuality(long encoded) {
|
||||
return (int) ((encoded >>> QUALITY_SHIFT) & QUALITY_MASK);
|
||||
}
|
||||
|
||||
public static int decodeTopology(long encoded) {
|
||||
return (int) ((encoded >>> TOPOLOGY_SHIFT) & TOPOLOGY_MASK);
|
||||
}
|
||||
|
||||
public static int decodeAvgSentenceLength(long encoded) {
|
||||
return (int) ((encoded >>> ASL_SHIFT) & ASL_MASK);
|
||||
}
|
||||
|
||||
public static int decodeYear(long encoded) {
|
||||
return PubDate.fromYearByte((int) ((encoded >>> YEAR_SHIFT) & YEAR_MASK));
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return ENC_DOMAIN_SIZE_MULTIPLIER * encDomainSize;
|
||||
}
|
||||
|
||||
public static int decodeSize(long encoded) {
|
||||
return ENC_DOMAIN_SIZE_MULTIPLIER * (int) ((encoded >>> ENC_DOMAIN_SIZE_SHIFT) & ENC_DOMAIN_SIZE_MASK);
|
||||
}
|
||||
|
||||
public static int decodeRank(long encoded) {
|
||||
return (int) ((encoded >>> RANK_SHIFT) & RANK_MASK);
|
||||
}
|
||||
|
||||
public static long encodeRank(long encoded, int rank) {
|
||||
return encoded | min(RANK_MASK, max(0, rank)) << RANK_SHIFT;
|
||||
}
|
||||
|
||||
public EnumSet<DocumentFlags> flagSet() {
|
||||
return DocumentFlags.decode(flags);
|
||||
}
|
||||
|
||||
}
|
@@ -0,0 +1,73 @@
|
||||
package nu.marginalia.model.idx;
|
||||
|
||||
|
||||
import java.util.EnumSet;
|
||||
|
||||
public enum WordFlags {
|
||||
/** Word appears in title */
|
||||
Title,
|
||||
|
||||
/** Word appears to be the subject in several sentences */
|
||||
Subjects,
|
||||
|
||||
/** Word is a likely named object. This is a weaker version of Subjects. */
|
||||
NamesWords,
|
||||
|
||||
/** The word isn't actually a word on page, but a fake keyword from the code
|
||||
* to aid discovery
|
||||
*/
|
||||
Synthetic,
|
||||
|
||||
/** Word is important to site
|
||||
*/
|
||||
Site,
|
||||
|
||||
/** Word is important to adjacent documents
|
||||
* */
|
||||
SiteAdjacent,
|
||||
|
||||
/** Keyword appears in URL path
|
||||
*/
|
||||
UrlPath,
|
||||
|
||||
/** Keyword appears in domain name
|
||||
*/
|
||||
UrlDomain,
|
||||
|
||||
/** Word appears in an external link */
|
||||
ExternalLink
|
||||
;
|
||||
|
||||
public byte asBit() {
|
||||
return (byte) (1 << ordinal());
|
||||
}
|
||||
|
||||
public boolean isPresent(byte value) {
|
||||
return (asBit() & value) > 0;
|
||||
}
|
||||
|
||||
public boolean isAbsent(byte value) {
|
||||
return (asBit() & value) == 0;
|
||||
}
|
||||
|
||||
public static byte encode(EnumSet<WordFlags> flags) {
|
||||
byte ret = 0;
|
||||
for (WordFlags f : flags) {
|
||||
ret |= f.asBit();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static EnumSet<WordFlags> decode(byte encodedValue) {
|
||||
EnumSet<WordFlags> ret = EnumSet.noneOf(WordFlags.class);
|
||||
|
||||
for (WordFlags f : values()) {
|
||||
if ((encodedValue & f.asBit()) > 0) {
|
||||
ret.add(f);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
93
code/common/model/java/nu/marginalia/util/QueryParams.java
Normal file
93
code/common/model/java/nu/marginalia/util/QueryParams.java
Normal file
@@ -0,0 +1,93 @@
|
||||
package nu.marginalia.util;
|
||||
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
|
||||
import javax.annotation.Nullable;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.StringJoiner;
|
||||
|
||||
public class QueryParams {
|
||||
|
||||
@Nullable
|
||||
public static String queryParamsSanitizer(String path, @Nullable String queryParams) {
|
||||
if (queryParams == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
String ret;
|
||||
if (queryParams.indexOf('&') >= 0) {
|
||||
|
||||
List<String> parts = new ArrayList<>();
|
||||
for (var part : StringUtils.split(queryParams, '&')) {
|
||||
if (QueryParams.isPermittedParam(path, part)) {
|
||||
parts.add(part);
|
||||
}
|
||||
}
|
||||
if (parts.size() > 1) {
|
||||
parts.sort(Comparator.naturalOrder());
|
||||
}
|
||||
StringJoiner retJoiner = new StringJoiner("&");
|
||||
parts.forEach(retJoiner::add);
|
||||
ret = retJoiner.toString();
|
||||
}
|
||||
else if (isPermittedParam(path, queryParams)) {
|
||||
ret = queryParams;
|
||||
}
|
||||
else {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (ret.isBlank())
|
||||
return null;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static boolean isPermittedParam(String path, String param) {
|
||||
if (path.endsWith(".cgi")) return true;
|
||||
|
||||
if (path.endsWith("/posting.php")) return false;
|
||||
|
||||
if (param.startsWith("id=")) return true;
|
||||
if (param.startsWith("p=")) {
|
||||
// Don't retain forum links with post-id:s, they're always non-canonical and eat up a lot of
|
||||
// crawling bandwidth
|
||||
|
||||
if (path.endsWith("showthread.php") || path.endsWith("viewtopic.php")) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (param.startsWith("f=")) {
|
||||
if (path.endsWith("showthread.php") || path.endsWith("viewtopic.php")) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
if (param.startsWith("i=")) return true;
|
||||
if (param.startsWith("start=")) return true;
|
||||
if (param.startsWith("t=")) return true;
|
||||
if (param.startsWith("v=")) return true;
|
||||
|
||||
if (param.startsWith("post=")) return true;
|
||||
|
||||
if (path.endsWith("index.php")) {
|
||||
if (param.startsWith("showtopic="))
|
||||
return true;
|
||||
if (param.startsWith("showforum="))
|
||||
return true;
|
||||
}
|
||||
|
||||
if (path.endsWith("StoryView.py")) { // folklore.org is neat
|
||||
return param.startsWith("project=") || param.startsWith("story=");
|
||||
}
|
||||
|
||||
// www.perseus.tufts.edu:
|
||||
if (param.startsWith("collection=")) return true;
|
||||
if (param.startsWith("doc=")) return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
11
code/common/model/readme.md
Normal file
11
code/common/model/readme.md
Normal file
@@ -0,0 +1,11 @@
|
||||
# Model
|
||||
|
||||
This package contains common models to the search engine
|
||||
|
||||
## Central Classes
|
||||
|
||||
* [EdgeDomain](java/nu/marginalia/model/EdgeDomain.java)
|
||||
* [EdgeUrl](java/nu/marginalia/model/EdgeUrl.java)
|
||||
* [DocumentMetadata](java/nu/marginalia/model/idx/DocumentMetadata.java)
|
||||
* [DocumentFlags](java/nu/marginalia/model/idx/DocumentFlags.java)
|
||||
* [WordFlags](java/nu/marginalia/model/idx/WordFlags.java)
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user