e621 accepts sources from most anywhere, but which sites generate the most sources for e621? It is not that hard to run a query and sort by URL but we want something a little more fine-tuned than that. I would like to use the inbuilt tools for URL manipulation, but that can not seem to discard subdomains. That is, tell the difference between www.site.com
and site.com
. Because of that I used parse-domain to extract the domain and tld of each domain.
A full list of the sources can be found here.
import { parseDomain } from 'parse-domain';
import fs from 'fs';
import { URL } from 'url';
import db from './../../_code/database.mjs';
import { file_here } from './../../_code/file.mjs'
import { PieChart } from 'd3-charts';
function url_to_short_url (text) {
try {
const { domain, topLevelDomains } = parseDomain((new URL(text)).hostname);
// TODO currently a bug in parse domain or something.
// I'm too lazy to fix this right now.
if (domain.includes('d3gz42uwgl1r1y')) {
return topLevelDomains.join('.');
}
return domain + '.' + topLevelDomains.sort().join('.');
} catch (e) {
return undefined;
}
}
function count_array_into_object (array) {
const object = {};
for (const item of array) {
if (object[item] === undefined) {
object[item] = 1;
} else {
object[item] += 1;
}
}
return object;
}
const all_urls = db
.prepare('select source from posts_sources')
.all()
.map(e => url_to_short_url(e.source))
.filter(e => e);
const url_count = count_array_into_object(all_urls);
const ranked_urls = Object.entries(url_count)
.map(e => ({
key: e[0],
count: e[1]
}))
.sort((a, b) => {
const count_difference = b.count - a.count; // sort count desc
if (count_difference != 0) {
return count_difference;
} else {
return a.key.localeCompare(b.key); // sort name desc
}
});
const output_path = file_here(import.meta.url, 'source_popularity.svg');
new PieChart({
rotate_labels: true,
text_distance_scale: 0.4,
title: { text: `Popularity of Sources - ${db.most_recent_date}` }
}).draw(ranked_urls).save(output_path);
const table_path = file_here(import.meta.url, 'source_table.generated.html');
const table = `
<link href="/static/default_style.css" rel="stylesheet" type="text/css">
<table>
<thead>
<tr><th>Site</th><th>Number of Sources</th></tr>
</thead>
<tbody>
${ranked_urls.map(e => `<tr><td>${e.key}</td><td>${e.count}</td></tr>`).join('\n')}
</tbody>
</table>
`;
fs.writeFileSync(table_path, table, 'utf8');