fix typos in .gitignore

2026-01-14 23:36:31 +01:00 · 2026-01-14 23:36:31 +01:00 · 6eb3097f3d
commit 6eb3097f3d
parent 35057d7957
6 changed files with 241 additions and 2 deletions
--- a/src/classifiers.rs
+++ b/src/classifiers.rs
@ -0,0 +1,121 @@
+use std::process::Command;
+use serde::{Deserialize, Serialize};
+use anyhow::{Context, Result};
+
+pub fn classify(input: &str, current_tag_tree: String) -> Result<String> {
+    let prompt = format!("You are a resource classifier. Given a hierarchical tag tree and a resource, classify it into 1-3 most specific applicable tags.
+
+# RULES:
+- Each level down = narrower specialization
+- Assign MOST SPECIFIC tags that fit (prefer leaf nodes when appropriate)
+- If no good fit exists, suggest new tag(s) with proposed location in tree
+- Output JSON only
+
+# CURRENT TAG TREE:
+{current_tag_tree}
+
+# RESOURCE INFORMATION:
+{input}
+
+# OUTPUT FORMAT:
+{{
+  \"tags\": [\"path/to/tag1\", \"path/to/tag2\"],
+  \"confidence\": [0.95, 0.87],
+  \"new_tags\": [
+    {{
+      \"name\": \"suggested_tag\",
+      \"parent\": \"path/to/parent\",
+      \"reason\": \"why this tag is needed\"
+    }}
+  ],
+  \"reasoning\": \"brief explanation of classification\"
+}}");
+
+    let out = Command::new("codex")
+        .arg("e")
+        .arg(prompt)
+        .output()
+        .with_context(|| "Failed to execute tweet scraping command")?;
+    println!("Output: {:?}", out);
+    Ok(String::from_utf8_lossy(&out.stdout).to_string())
+}
+
+// Yeah
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct ClassificationResult {
+    pub tags: Vec<String>,
+    pub confidence: Vec<f32>,
+    #[serde(default)]
+    pub new_tags: Vec<NewTagSuggestion>,
+    pub reasoning: String,
+}
+
+#[derive(Debug, Serialize, Deserialize)]
+pub struct NewTagSuggestion {
+    pub name: String,
+    pub parent: String,
+    pub reason: String,
+}
+
+impl ClassificationResult {
+    /// Parse from the JSON string returned by the LLM
+    pub fn from_json(json_str: &str) -> Result<Self, serde_json::Error> {
+        serde_json::from_str(json_str)
+    }
+
+    /// Get the most confident tag (if any exist)
+    pub fn primary_tag(&self) -> Option<(&str, f32)> {
+        self.tags.iter()
+            .zip(self.confidence.iter())
+            .max_by(|a, b| a.1.partial_cmp(b.1).unwrap())
+            .map(|(tag, conf)| (tag.as_str(), *conf))
+    }
+
+    /// Check if classification confidence is above threshold
+    pub fn is_confident(&self, threshold: f32) -> bool {
+        self.confidence.iter().any(|&c| c >= threshold)
+    }
+
+    /// Get tags above confidence threshold
+    pub fn confident_tags(&self, threshold: f32) -> Vec<&str> {
+        self.tags.iter()
+            .zip(self.confidence.iter())
+            .filter(|&(_, &conf)| conf >= threshold)
+            .map(|(tag, _)| tag.as_str())
+            .collect()
+    }
+}
+
+// Example usage in your code:
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_parse_example() {
+        let json = r#"{
+  "tags": ["cs/theory/algorithms/compression"],
+  "confidence": [0.42],
+  "new_tags": [
+    {
+      "name": "information_theory",
+      "parent": "cs/theory",
+      "reason": "Resource is explicitly about learning information theory concepts (entropy, intuition, applications)."
+    }
+  ],
+  "reasoning": "The content is centered on information theory; the closest existing tag is compression under theory/algorithms, but a dedicated information theory tag would fit better."
+}"#;
+
+        let result = ClassificationResult::from_json(json).unwrap();
+
+        assert_eq!(result.tags.len(), 1);
+        assert_eq!(result.tags[0], "cs/theory/algorithms/compression");
+        assert_eq!(result.confidence[0], 0.42);
+        assert_eq!(result.new_tags.len(), 1);
+        assert_eq!(result.new_tags[0].name, "information_theory");
+
+        println!("Primary tag: {:?}", result.primary_tag());
+        println!("Is confident (>0.5): {}", result.is_confident(0.5));
+    }
+}