library(tidyverse) get_tangled_repo <- function(host, did) { httr::GET( url = sprintf("%s/xrpc/com.atproto.repo.listRecords", host[1]), query = list( repo = did, collection = "sh.tangled.repo", limit = 100 ) ) |> httr::content(as = "text") |> jsonlite::fromJSON() } msgs <- jsonlite::stream_in(file("tangled.json")) actors <- unique(msgs$did) msgs$commit |> filter( collection == "sh.tangled.graph.follow" ) |> pull(record) |> filter(!is.na(subject)) |> pull(subject) |> unique() -> followed msgs$commit |> filter( collection == "sh.tangled.feed.star" ) |> pull(record) |> filter(!is.na(subject)) |> pull(subject) |> gsub("^at://|/sh.*", "", x = _) |> unique() -> starred c(actors, followed, starred) |> unique() |> sprintf("https://plc.directory/%s", x = _) |> map(httr::GET, .progress = TRUE) |> map(httr::content, as = "text") |> map(jsonlite::fromJSON) |> map(\(.x) { tibble( did = .x$id[1], handle = .x$alsoKnownAs[1], pds = .x$service$serviceEndpoint[1] ) }) |> list_rbind() |> mutate( repos = map2(pds, did, get_tangled_repo, .progress = TRUE), records = map(repos, "records") ) |> select(-repos, -did) |> filter(lengths(records) > 1) |> unnest(records) |> unnest(value) |> janitor::clean_names() -> xdf xdf |> pull(handle) |> unique() xdf |> pull(pds) |> unique() |> curlparse::domain() |> psl::apex_domain() |> table(dnn = "domain") |> sort() |> as.data.frame.table( responseName = "ct" ) xdf |> pull(knot) |> table(dnn = "knot") |> sort() |> as.data.frame.table( responseName = "ct" ) xdf |> distinct( handle, name, description ) |> mutate( handle = sub("^at://", "@", handle), md = sprintf("- (`%s`) %s%s%s", handle, name, ifelse(is.na(description), "", ": "), ifelse(is.na(description), "", description)) ) |> arrange(md) |> pull(md) |> writeLines()