add phash repost detection

This commit is contained in:
2026-06-13 19:09:36 +02:00
parent 4c742aaf66
commit 1b8860d8ff
4 changed files with 67 additions and 5 deletions

View File

@@ -209,7 +209,8 @@ export default new class queue {
const results = await db` const results = await db`
SELECT id FROM items SELECT id FROM items
WHERE phash IS NOT NULL AND phash != '' AND phash != 'ERROR' AND phash != 'MISSING' AND phash NOT LIKE '00000000%' WHERE is_deleted = false
AND phash IS NOT NULL AND phash != '' AND phash != 'ERROR' AND phash != 'MISSING' AND phash NOT LIKE '00000000%'
AND ( AND (
( (
CASE WHEN split_part(phash, '_', 1) != '' AND ${h1} != '' THEN CASE WHEN split_part(phash, '_', 1) != '' AND ${h1} != '' THEN
@@ -237,6 +238,40 @@ export default new class queue {
return results.length > 0 ? results[0].id : false; return results.length > 0 ? results[0].id : false;
}; };
async findallrepostphash(newHash, excludeId = null) {
if (!newHash) return [];
const newHashes = newHash.split('_').filter(s => s && !s.startsWith('00000000'));
if (newHashes.length === 0) return [];
const h1 = newHashes[0] || '';
const h2 = newHashes[1] || '';
const h3 = newHashes[2] || '';
const results = await db`
SELECT id, username, stamp FROM items
WHERE is_deleted = false
AND phash IS NOT NULL AND phash != '' AND phash != 'ERROR' AND phash != 'MISSING' AND phash NOT LIKE '00000000%'
${excludeId ? db`AND id != ${excludeId}` : db``}
AND (
CASE WHEN split_part(phash, '_', 1) != '' AND ${h1} != '' THEN
bit_count(('x' || split_part(phash, '_', 1))::bit(1024) # ('x' || ${h1})::bit(1024)) <= 15
ELSE false END::int
+
CASE WHEN split_part(phash, '_', 2) != '' AND ${h2} != '' THEN
bit_count(('x' || split_part(phash, '_', 2))::bit(1024) # ('x' || ${h2})::bit(1024)) <= 15
ELSE false END::int
+
CASE WHEN split_part(phash, '_', 3) != '' AND ${h3} != '' THEN
bit_count(('x' || split_part(phash, '_', 3))::bit(1024) # ('x' || ${h3})::bit(1024)) <= 15
ELSE false END::int
>= 1
)
ORDER BY id ASC
`;
return results.map(r => ({ id: r.id, username: r.username, stamp: r.stamp }));
};
async checkcommentrepostphash(newHash) { async checkcommentrepostphash(newHash) {
if (!newHash) return false; if (!newHash) return false;
const newHashes = newHash.split('_').filter(s => s && !s.startsWith('00000000')); const newHashes = newHash.split('_').filter(s => s && !s.startsWith('00000000'));

View File

@@ -2,6 +2,7 @@ import db from "../sql.mjs";
import lib from "../lib.mjs"; import lib from "../lib.mjs";
import cfg from "../config.mjs"; import cfg from "../config.mjs";
import { updateHallsCache } from "../halls_cache.mjs"; import { updateHallsCache } from "../halls_cache.mjs";
import queue from "../queue.mjs";
import fs from "fs"; import fs from "fs";
import url from "url"; import url from "url";
@@ -703,7 +704,7 @@ export default {
AND (checksum = ${baseChecksum} OR checksum LIKE ${baseChecksum + '_bypass_%'}) AND (checksum = ${baseChecksum} OR checksum LIKE ${baseChecksum + '_bypass_%'})
ORDER BY id ASC ORDER BY id ASC
`; `;
repostItems = repostRows.map(r => ({ id: r.id, username: r.username, stamp: r.stamp })); repostItems = repostRows.map(r => ({ id: r.id, username: r.username, stamp: r.stamp, match_type: 'checksum' }));
} else if (actitem.checksum) { } else if (actitem.checksum) {
// Even without bypass, check if other bypass-entries exist with this same hash // Even without bypass, check if other bypass-entries exist with this same hash
const baseChecksum = actitem.checksum; const baseChecksum = actitem.checksum;
@@ -714,9 +715,27 @@ export default {
AND checksum LIKE ${baseChecksum + '_bypass_%'} AND checksum LIKE ${baseChecksum + '_bypass_%'}
ORDER BY id ASC ORDER BY id ASC
`; `;
repostItems = repostRows.map(r => ({ id: r.id, username: r.username, stamp: r.stamp })); repostItems = repostRows.map(r => ({ id: r.id, username: r.username, stamp: r.stamp, match_type: 'checksum' }));
} }
// Also find visually-similar items via phash, merging with checksum results
if (actitem.phash && actitem.phash !== 'ERROR' && actitem.phash !== 'MISSING') {
try {
const phashMatches = await queue.findallrepostphash(actitem.phash, itemid);
const existingIds = new Set(repostItems.map(r => r.id));
for (const pm of phashMatches) {
if (!existingIds.has(pm.id)) {
repostItems.push({ id: pm.id, username: pm.username, stamp: pm.stamp, match_type: 'phash' });
existingIds.add(pm.id);
}
}
repostItems.sort((a, b) => a.id - b.id);
} catch (e) {
console.error('[GETF0CK] phash repost lookup failed:', e.message);
}
}
// Efficient coverart fallback // Efficient coverart fallback
const coverartUrl = actitem.has_coverart const coverartUrl = actitem.has_coverart
? `${cfg.websrv.paths.coverarts}/${actitem.id}.webp` ? `${cfg.websrv.paths.coverarts}/${actitem.id}.webp`

View File

@@ -260,7 +260,11 @@
<th>Repost</th> <th>Repost</th>
<td> <td>
@each(item.reposts as rp) @each(item.reposts as rp)
<a href="/{{ rp.id }}" style="margin-right: 4px;">#{{ rp.id }}</a> @if(rp.match_type === 'phash')
<a href="/{{ rp.id }}" style="margin-right: 4px; opacity: 0.75;" tooltip="Visually similar (perceptual hash)" flow="up">~#{{ rp.id }}</a>
@else
<a href="/{{ rp.id }}" style="margin-right: 4px;" tooltip="Exact duplicate (checksum)" flow="up">#{{ rp.id }}</a>
@endif
@endeach @endeach
</td> </td>
</tr> </tr>

View File

@@ -219,7 +219,11 @@
<th>Repost</th> <th>Repost</th>
<td> <td>
@each(item.reposts as rp) @each(item.reposts as rp)
<a href="/{{ rp.id }}" style="margin-right: 4px;">#{{ rp.id }}</a> @if(rp.match_type === 'phash')
<a href="/{{ rp.id }}" style="margin-right: 4px; opacity: 0.75;" tooltip="Visually similar (perceptual hash)" flow="up">~#{{ rp.id }}</a>
@else
<a href="/{{ rp.id }}" style="margin-right: 4px;" tooltip="Exact duplicate (checksum)" flow="up">#{{ rp.id }}</a>
@endif
@endeach @endeach
</td> </td>
</tr> </tr>