76 lines
2.4 KiB
Python
76 lines
2.4 KiB
Python
"""
|
|
Celery tasks for OSINT crawling.
|
|
"""
|
|
from celery import shared_task
|
|
from django.core.management import call_command
|
|
from django.utils import timezone
|
|
from datetime import timedelta
|
|
from .models import SeedWebsite, AutoGeneratedReport
|
|
|
|
|
|
@shared_task
|
|
def crawl_osint_seeds():
|
|
"""
|
|
Periodic task to crawl all due seed websites.
|
|
This should be scheduled to run periodically (e.g., every hour).
|
|
"""
|
|
try:
|
|
call_command('crawl_osint', '--all', verbosity=0)
|
|
return "OSINT crawling completed successfully"
|
|
except Exception as e:
|
|
return f"OSINT crawling failed: {str(e)}"
|
|
|
|
|
|
@shared_task
|
|
def crawl_specific_seed(seed_id):
|
|
"""
|
|
Crawl a specific seed website.
|
|
"""
|
|
try:
|
|
call_command('crawl_osint', '--seed-id', str(seed_id), verbosity=0)
|
|
return f"Seed website {seed_id} crawled successfully"
|
|
except Exception as e:
|
|
return f"Seed website {seed_id} crawling failed: {str(e)}"
|
|
|
|
|
|
@shared_task
|
|
def auto_approve_high_confidence_reports():
|
|
"""
|
|
Auto-approve reports with very high confidence scores and auto-approve keywords.
|
|
"""
|
|
from reports.models import ScamReport
|
|
|
|
# Get auto-reports that should be auto-approved
|
|
auto_reports = AutoGeneratedReport.objects.filter(
|
|
status='pending',
|
|
confidence_score__gte=80
|
|
).prefetch_related('matched_keywords')
|
|
|
|
approved_count = 0
|
|
for auto_report in auto_reports:
|
|
# Check if any matched keyword has auto_approve enabled
|
|
if any(kw.auto_approve for kw in auto_report.matched_keywords.all()):
|
|
# Approve and publish
|
|
from osint.views import ApproveAutoReportView
|
|
# Create report directly
|
|
report = ScamReport.objects.create(
|
|
title=auto_report.title,
|
|
description=auto_report.description,
|
|
reported_url=auto_report.source_url,
|
|
scam_type='other',
|
|
status='verified',
|
|
verification_score=auto_report.confidence_score,
|
|
is_public=True,
|
|
is_anonymous=True,
|
|
is_auto_discovered=True, # Mark as auto-discovered
|
|
)
|
|
|
|
auto_report.report = report
|
|
auto_report.status = 'published'
|
|
auto_report.published_at = timezone.now()
|
|
auto_report.save()
|
|
approved_count += 1
|
|
|
|
return f"Auto-approved {approved_count} reports"
|
|
|