crawler: add crawler

This commit is contained in:
TheGeneralist 2025-08-08 13:04:17 +02:00
parent 2ccb7bc260
commit c180f98284
Signed by: thegeneralist01
SSH key fingerprint: SHA256:pp9qddbCNmVNoSjevdvQvM5z0DHN7LTa8qBMbcMq/R4
10 changed files with 122 additions and 34 deletions

View file

@ -23,6 +23,9 @@ in {
"archive.${domain}" = {
group = "acme";
};
"crawler.${domain}" = {
group = "acme";
};
};
acceptTerms = true;

View file

@ -0,0 +1,34 @@
{ pkgs, ... }:
{
# virtualisation.docker.enable = true;
virtualisation.podman = {
enable = true;
dockerCompat = true;
};
virtualisation.oci-containers.containers = {
archivebox = {
image = "ghcr.io/archivebox/archivebox:main";
ports = [ "127.0.0.1:8000:8000" ];
volumes = [
"/mnt/usb/services/archivebox/data:/data"
];
environment = {
ALLOWLIST_HOSTS = "localhost";
CSRF_TRUSTED_ORIGINS = "https://archive.thegeneralist01.com,127.0.0.1:8000";
REVERSE_PROXY_USER_HEADER = "X-Remote-User";
REVERSE_PROXY_WHITELIST = "127.0.0.1/32,100.86.129.23/32";
};
};
pywb = {
image = "docker.io/webrecorder/pywb";
ports = [ "127.0.0.1:8001:8001" ];
volumes = [
"/mnt/usb/services/browsertrix/webrecorder/:/"
"/mnt/usb/services/browsertrix/webrecorder/webarchive:/webarchive"
];
};
};
environment.systemPackages = [ pkgs.docker ];
}

View file

@ -0,0 +1,41 @@
let
acmeDomain = "thegeneralist01.com";
domain = "crawler.${acmeDomain}";
ssl = {
forceSSL = true;
quic = true;
useACMEHost = domain;
};
in
{
services.nginx.virtualHosts.${domain} = ssl // {
listen = [
{
addr = "100.86.129.23";
port = 443;
ssl = true;
}
{
addr = "100.86.129.23";
port = 80;
}
];
locations."/" = {
proxyPass = "http://127.0.0.1:8001";
recommendedProxySettings = true;
extraConfig = ''
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection $connection_upgrade;
# tell nginx not to buffer the response. send it as it comes.
proxy_buffering off;
# give jellyfin plenty of time to transcode
proxy_read_timeout 3600s;
proxy_send_timeout 3600s;
'';
};
};
}

View file

@ -9,6 +9,8 @@ let
};
in
{
imports = [ ./archivebox.nix ./crawler-site.nix ];
services.nginx.virtualHosts.${domain} = ssl // {
listen = [
{

View file

@ -5,7 +5,7 @@
{ config, pkgs, inputs, ... }:
{
imports = [ ./hardware-configuration.nix ./site.nix ./cache ./garage.nix ./archive ];
imports = [ ./hardware-configuration.nix ./site.nix ./cache ./archive ];
age.secrets.password.file = ./password.age;
users.users = {

View file

@ -27,6 +27,20 @@ let
ns IN A 100.86.129.23
@ IN A 100.86.129.23
'';
crawlerZoneFile = pkgs.writeText "crawler.zone" ''
$ORIGIN crawler.thegeneralist01.com.
@ IN SOA ns.crawler.thegeneralist01.com. thegeneralist01.proton.me. (
2025080801 ; serial (yyyymmddXX)
3600 ; refresh
600 ; retry
86400 ; expire
3600 ; minimum
)
IN NS ns.crawler.thegeneralist01.com.
ns IN A 100.86.129.23
@ IN A 100.86.129.23
'';
in
{
services.coredns = {
@ -44,6 +58,12 @@ in
errors
}
crawler.thegeneralist01.com:53 {
file ${crawlerZoneFile}
log
errors
}
.:53 {
forward . 100.100.100.100 45.90.28.181 45.90.30.181
cache

View file

@ -1,18 +0,0 @@
{ pkgs, ... }: {
virtualisation.docker.enable = true;
virtualisation.oci-containers.containers.archivebox = {
image = "ghcr.io/archivebox/archivebox:main";
ports = [ "127.0.0.1:8000:8000" ];
volumes = [
"/mnt/usb/services/archivebox/data:/data"
];
environment = {
ALLOWLIST_HOSTS = "localhost";
CSRF_TRUSTED_ORIGINS = "https://archive.thegeneralist01.com,127.0.0.1:8000";
REVERSE_PROXY_USER_HEADER = "X-Remote-User";
REVERSE_PROXY_WHITELIST = "127.0.0.1/32,100.86.129.23/32";
};
};
environment.systemPackages = [ pkgs.docker ];
}

View file

@ -1,4 +1,4 @@
{ lib, ... }:
{ lib, pkgs, ... }:
{
boot.initrd.availableKernelModules = [
@ -35,19 +35,25 @@
}
];
fileSystems."/mnt/usb" = {
device = "/dev/disk/by-uuid/AADEEA03DEE9C7A1";
fsType = "ntfs-3g";
options = [
"rw"
"noatime"
];
};
# fileSystems."/mnt/usb" = {
# device = "/dev/disk/by-uuid/AADEEA03DEE9C7A1";
# fsType = "ntfs-3g";
# options = [
# "rw"
# "noatime"
# ];
# };
#
boot.extraModprobeConfig = ''
options usbcore autosuspend=-1
'';
environment.systemPackages = [ pkgs.hdparm ];
services.udev.extraRules = ''
ACTION=="add", KERNEL=="sda", RUN+="${pkgs.hdparm}/bin/hdparm -B 255 -S 0 /dev/sda"
'';
# Enables DHCP on each ethernet and wireless interface. In case of scripted networking
# (the default) this is the recommended approach. When using systemd-networkd it's
# still possible to use this option, but it's recommended to use it in conjunction

View file

@ -22,10 +22,10 @@ in
group = "jellyfin";
user = "jellyfin";
cacheDir = "/mnt/usb/jellyfin/cache";
dataDir = "/mnt/usb/jellyfin/data";
configDir = "/mnt/usb/jellyfin/data/config";
logDir = "/mnt/usb/jellyfin/data/log";
cacheDir = "/mnt/usb/services/jellyfin/cache";
dataDir = "/mnt/usb/services/jellyfin/data/data";
configDir = "/mnt/usb/services/jellyfin/data/config";
logDir = "/mnt/usb/services/jellyfin/data/log";
};
services.nginx.virtualHosts.${domain} = ssl // {

View file

@ -35,7 +35,7 @@ return {
-- :get_install_path() .. "/node_modules/@vue/language-server" .. "/node_modules/@vue/typescript-plugin"
local capabilities = require("blink.cmp").get_lsp_capabilities()
vim.lsp.enable("nil_ls")
vim.lsp.enable("nixd")
require("mason-lspconfig").setup({
automatic_enable = true,
ensure_installed = {