Skip to content

fetch_posts.rb

Example: sync all posts from a given account (excluding replies and reposts) to a local JSON file. When run again, it will only fetch new posts since the last time and add them to the file.

We'll use the getAuthorFeed endpoint on the AppView here, which does not require authentication.

rb
require 'didkit'
require 'minisky'
require 'time'

if ARGV.length != 2
  puts "Usage: #{$PROGRAM_NAME} <handle | did> <posts.json>"
  exit 1
end

$did = DID.resolve_handle(ARGV[0]).to_s

if $did.empty?
  puts "Couldn't resolve handle: #{ARGV[0]}"
  exit 1
end

$posts_file = ARGV[1]

# create a client instance
bsky = Minisky.new('public.api.bsky.app', nil)

# print progress dots when loading multiple pages
bsky.default_progress = '.'

# load previously saved posts; we'll only fetch posts newer than the last saved before
posts = File.exist?($posts_file) ? JSON.parse(File.read($posts_file)) : []
latest_date = posts[0] && posts[0]['indexedAt']

if !posts.empty?
  last_date_fmt = Time.parse(latest_date).getlocal
  puts "Loaded #{posts.length} previously saved posts (last date: #{last_date_fmt})"
end

# fetch all posts from the profile feed (without replies) until the target timestamp
results = bsky.fetch_all('app.bsky.feed.getAuthorFeed',
  { actor: $did, filter: 'posts_no_replies', limit: 100 },
  field: 'feed',
  break_when: latest_date && proc { |x| x['post']['indexedAt'] <= latest_date }
)

new_posts = results.map { |x| x['post'] }
  # skip reposts
  .reject { |x| x['author']['did'] != $did }
  # trim some data to save space
  .map { |x| x.slice('uri', 'cid', 'record', 'indexedAt') }

posts = new_posts + posts

puts
puts "Fetched #{new_posts.length} new posts (total = #{posts.length})"

# save all new and old posts back to the file
File.write($posts_file, JSON.pretty_generate(posts))