This commit is contained in:
Iliyan Angelov
2025-11-26 22:32:20 +02:00
commit ed94dd22dd
150 changed files with 14058 additions and 0 deletions

View File

@@ -0,0 +1,80 @@
# Generated by Django 5.2.8 on 2025-11-26 13:41
import django.db.models.deletion
from django.db import migrations, models
class Migration(migrations.Migration):
initial = True
dependencies = [
('reports', '0001_initial'),
]
operations = [
migrations.CreateModel(
name='OSINTConfiguration',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('service_name', models.CharField(max_length=100, unique=True)),
('api_key', models.CharField(blank=True, help_text='Encrypted API key', max_length=255)),
('api_url', models.URLField(blank=True)),
('is_active', models.BooleanField(default=True)),
('rate_limit', models.IntegerField(default=100, help_text='Requests per hour')),
('configuration', models.JSONField(blank=True, default=dict, help_text='Additional configuration')),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
],
options={
'verbose_name': 'OSINT Configuration',
'verbose_name_plural': 'OSINT Configurations',
'db_table': 'osint_osintconfiguration',
},
),
migrations.CreateModel(
name='OSINTResult',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('source', models.CharField(help_text='OSINT source/service name', max_length=100)),
('data_type', models.CharField(choices=[('whois', 'WHOIS Data'), ('dns', 'DNS Records'), ('ssl', 'SSL Certificate'), ('archive', 'Archive Data'), ('email', 'Email Data'), ('phone', 'Phone Data'), ('business', 'Business Registry Data'), ('social', 'Social Media Data'), ('reputation', 'Reputation Data')], max_length=50)),
('raw_data', models.JSONField(default=dict, help_text='Raw data from OSINT source')),
('processed_data', models.JSONField(blank=True, default=dict, help_text='Processed/cleaned data')),
('confidence_level', models.IntegerField(default=0, help_text='Confidence level (0-100)')),
('is_verified', models.BooleanField(default=False, help_text='Manually verified by moderator')),
('collected_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('report', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='osint_results', to='reports.scamreport')),
],
options={
'verbose_name': 'OSINT Result',
'verbose_name_plural': 'OSINT Results',
'db_table': 'osint_osintresult',
'ordering': ['-collected_at'],
'indexes': [models.Index(fields=['report', 'data_type'], name='osint_osint_report__4a95b0_idx'), models.Index(fields=['confidence_level', 'is_verified'], name='osint_osint_confide_47552d_idx')],
},
),
migrations.CreateModel(
name='OSINTTask',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('task_type', models.CharField(choices=[('domain_analysis', 'Domain Analysis'), ('url_analysis', 'URL Analysis'), ('email_analysis', 'Email Analysis'), ('phone_analysis', 'Phone Analysis'), ('whois_lookup', 'WHOIS Lookup'), ('dns_lookup', 'DNS Lookup'), ('ssl_check', 'SSL Certificate Check'), ('archive_check', 'Archive Check'), ('business_registry', 'Business Registry Check'), ('social_media', 'Social Media Check')], max_length=50)),
('status', models.CharField(choices=[('pending', 'Pending'), ('running', 'Running'), ('completed', 'Completed'), ('failed', 'Failed'), ('cancelled', 'Cancelled')], default='pending', max_length=20)),
('parameters', models.JSONField(default=dict, help_text='Task parameters (e.g., URL, email, phone)')),
('result', models.JSONField(blank=True, default=dict, help_text='Task result data')),
('error_message', models.TextField(blank=True)),
('created_at', models.DateTimeField(auto_now_add=True)),
('started_at', models.DateTimeField(blank=True, null=True)),
('completed_at', models.DateTimeField(blank=True, null=True)),
('retry_count', models.IntegerField(default=0)),
('report', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='osint_tasks', to='reports.scamreport')),
],
options={
'verbose_name': 'OSINT Task',
'verbose_name_plural': 'OSINT Tasks',
'db_table': 'osint_osinttask',
'ordering': ['-created_at'],
'indexes': [models.Index(fields=['status', 'created_at'], name='osint_osint_status_290802_idx'), models.Index(fields=['report', 'task_type'], name='osint_osint_report__e7bd16_idx')],
},
),
]

View File

@@ -0,0 +1,157 @@
# Generated by Django 5.2.8 on 2025-11-26 18:03
import django.db.models.deletion
from django.conf import settings
from django.db import migrations, models
class Migration(migrations.Migration):
dependencies = [
('osint', '0001_initial'),
('reports', '0002_scamreport_is_auto_discovered'),
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
]
operations = [
migrations.CreateModel(
name='OSINTKeyword',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('keyword', models.CharField(help_text='Keyword, phrase, or regex pattern to search for', max_length=500)),
('name', models.CharField(help_text='Friendly name for this keyword', max_length=200)),
('description', models.TextField(blank=True, help_text='Description of what this keyword detects')),
('keyword_type', models.CharField(choices=[('exact', 'Exact Match'), ('regex', 'Regular Expression'), ('phrase', 'Phrase Match'), ('domain', 'Domain Pattern'), ('email', 'Email Pattern'), ('phone', 'Phone Pattern')], default='phrase', help_text='Type of matching to perform', max_length=20)),
('is_active', models.BooleanField(default=True, help_text='Enable/disable this keyword')),
('case_sensitive', models.BooleanField(default=False, help_text='Case sensitive matching')),
('confidence_score', models.IntegerField(default=50, help_text='Default confidence score (0-100) when this keyword matches')),
('auto_approve', models.BooleanField(default=False, help_text='Auto-approve reports matching this keyword (requires high confidence)')),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('created_by', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='created_keywords', to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name': 'OSINT Keyword',
'verbose_name_plural': 'OSINT Keywords',
'db_table': 'osint_keyword',
'ordering': ['-is_active', 'name'],
},
),
migrations.CreateModel(
name='CrawledContent',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('url', models.URLField(help_text='URL of the crawled page', max_length=1000)),
('title', models.CharField(blank=True, help_text='Page title', max_length=500)),
('content', models.TextField(help_text='Crawled page content')),
('html_content', models.TextField(blank=True, help_text='Raw HTML content')),
('match_count', models.IntegerField(default=0, help_text='Number of keyword matches found')),
('confidence_score', models.IntegerField(default=0, help_text='Calculated confidence score based on matches')),
('has_potential_scam', models.BooleanField(default=False, help_text='Flagged as potential scam based on keyword matches')),
('crawled_at', models.DateTimeField(auto_now_add=True)),
('http_status', models.IntegerField(blank=True, help_text='HTTP status code', null=True)),
('content_hash', models.CharField(blank=True, help_text='SHA256 hash of content for deduplication', max_length=64)),
('matched_keywords', models.ManyToManyField(blank=True, help_text='Keywords that matched this content', related_name='matched_contents', to='osint.osintkeyword')),
],
options={
'verbose_name': 'Crawled Content',
'verbose_name_plural': 'Crawled Contents',
'db_table': 'osint_crawledcontent',
'ordering': ['-crawled_at', '-confidence_score'],
},
),
migrations.CreateModel(
name='AutoGeneratedReport',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('title', models.CharField(help_text='Auto-generated report title', max_length=500)),
('description', models.TextField(help_text='Auto-generated report description')),
('source_url', models.URLField(help_text='Source URL where scam was found', max_length=1000)),
('confidence_score', models.IntegerField(default=0, help_text='Confidence score (0-100)')),
('status', models.CharField(choices=[('pending', 'Pending Review'), ('approved', 'Approved'), ('rejected', 'Rejected'), ('published', 'Published')], default='pending', help_text='Review status', max_length=20)),
('review_notes', models.TextField(blank=True, help_text='Notes from moderator/admin review')),
('reviewed_at', models.DateTimeField(blank=True, null=True)),
('published_at', models.DateTimeField(blank=True, null=True)),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('report', models.ForeignKey(blank=True, help_text='Linked scam report (created when approved)', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='auto_generated_reports', to='reports.scamreport')),
('reviewed_by', models.ForeignKey(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='reviewed_auto_reports', to=settings.AUTH_USER_MODEL)),
('crawled_content', models.OneToOneField(on_delete=django.db.models.deletion.CASCADE, related_name='auto_report', to='osint.crawledcontent')),
('matched_keywords', models.ManyToManyField(related_name='generated_reports', to='osint.osintkeyword')),
],
options={
'verbose_name': 'Auto-Generated Report',
'verbose_name_plural': 'Auto-Generated Reports',
'db_table': 'osint_autogeneratedreport',
'ordering': ['-created_at', '-confidence_score'],
},
),
migrations.CreateModel(
name='SeedWebsite',
fields=[
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
('url', models.URLField(help_text='Base URL to crawl', max_length=500)),
('name', models.CharField(help_text='Friendly name for this seed website', max_length=200)),
('description', models.TextField(blank=True, help_text='Description of the website')),
('is_active', models.BooleanField(default=True, help_text='Enable/disable crawling for this website')),
('priority', models.CharField(choices=[('high', 'High'), ('medium', 'Medium'), ('low', 'Low')], default='medium', help_text='Crawling priority', max_length=10)),
('crawl_depth', models.IntegerField(default=2, help_text='Maximum depth to crawl (0 = only this page, 1 = this page + direct links, etc.)')),
('crawl_interval_hours', models.IntegerField(default=24, help_text='Hours between crawls')),
('allowed_domains', models.JSONField(blank=True, default=list, help_text='List of allowed domains to crawl (empty = same domain only)')),
('user_agent', models.CharField(blank=True, default='Mozilla/5.0 (compatible; OSINTBot/1.0)', help_text='User agent string for requests', max_length=255)),
('last_crawled_at', models.DateTimeField(blank=True, null=True)),
('pages_crawled', models.IntegerField(default=0)),
('matches_found', models.IntegerField(default=0)),
('created_at', models.DateTimeField(auto_now_add=True)),
('updated_at', models.DateTimeField(auto_now=True)),
('created_by', models.ForeignKey(null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='created_seed_websites', to=settings.AUTH_USER_MODEL)),
],
options={
'verbose_name': 'Seed Website',
'verbose_name_plural': 'Seed Websites',
'db_table': 'osint_seedwebsite',
'ordering': ['-priority', '-last_crawled_at'],
},
),
migrations.AddField(
model_name='crawledcontent',
name='seed_website',
field=models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='crawled_contents', to='osint.seedwebsite'),
),
migrations.AddIndex(
model_name='osintkeyword',
index=models.Index(fields=['is_active', 'keyword_type'], name='osint_keywo_is_acti_6f4814_idx'),
),
migrations.AddIndex(
model_name='autogeneratedreport',
index=models.Index(fields=['status', 'confidence_score'], name='osint_autog_status_a8a215_idx'),
),
migrations.AddIndex(
model_name='autogeneratedreport',
index=models.Index(fields=['created_at'], name='osint_autog_created_07e2b0_idx'),
),
migrations.AddIndex(
model_name='seedwebsite',
index=models.Index(fields=['is_active', 'priority'], name='osint_seedw_is_acti_411fa2_idx'),
),
migrations.AddIndex(
model_name='seedwebsite',
index=models.Index(fields=['last_crawled_at'], name='osint_seedw_last_cr_673111_idx'),
),
migrations.AddIndex(
model_name='crawledcontent',
index=models.Index(fields=['seed_website', 'crawled_at'], name='osint_crawl_seed_we_eb78f4_idx'),
),
migrations.AddIndex(
model_name='crawledcontent',
index=models.Index(fields=['has_potential_scam', 'confidence_score'], name='osint_crawl_has_pot_9317d0_idx'),
),
migrations.AddIndex(
model_name='crawledcontent',
index=models.Index(fields=['content_hash'], name='osint_crawl_content_17d05a_idx'),
),
migrations.AlterUniqueTogether(
name='crawledcontent',
unique_together={('url', 'content_hash')},
),
]

View File