You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

115 lines
3.0 KiB

# frozen_string_literal: true
module Extractor
MAX_DOMAIN_LENGTH = 253
extend Twitter::TwitterText::Extractor
module_function
def extract_entities_with_indices(text, options = {}, &block)
entities = extract_urls_with_indices(text, options) +
extract_hashtags_with_indices(text, check_url_overlap: false) +
extract_mentions_or_lists_with_indices(text) +
extract_extra_uris_with_indices(text)
return [] if entities.empty?
entities = remove_overlapping_entities(entities)
entities.each(&block) if block
entities
end
def extract_mentions_or_lists_with_indices(text)
return [] unless text && Twitter::TwitterText::Regex[:at_signs].match?(text)
possible_entries = []
text.scan(Account::MENTION_RE) do |screen_name, _|
match_data = $LAST_MATCH_INFO
after = ::Regexp.last_match.post_match
unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
_, domain = screen_name.split('@')
next if domain.present? && domain.length > MAX_DOMAIN_LENGTH
start_position = match_data.char_begin(1) - 1
end_position = match_data.char_end(1)
possible_entries << {
screen_name: screen_name,
indices: [start_position, end_position],
}
end
end
if block_given?
possible_entries.each do |mention|
yield mention[:screen_name], mention[:indices].first, mention[:indices].last
end
end
possible_entries
end
def extract_hashtags_with_indices(text, _options = {})
Bringing Hometown up to date with mastodon/mastodon (#1371) This is a massive PR, so apologies for the poor performance viewing it! This merges in the content of Mastodon 4.3.0. *This PR isn't ready to deploy to production*, and I don't recommend trying it in a production environment. Since I started by merging 4.3.0, this will likely have reverted at least some of the content of the post-4.3.0 security patches in the 4.2.x series; those will be brought back as I merge in 4.4.0 and 4.5.0. I haven't yet tested this PR, so it may not actually run without additional tweaks. I'm putting it up now for discussion and so multiple people can test if we want. --- Here's a few notes on things I ran into and thoughts I had while working on it: * We may want to look at `server_banner.jsx,` where I ended up undoing the v3-style rewrite due to the new react component being used. I'm not sure what the current goal for it is. * I may not have 100% merged in all content from `status_action_bar` correctly. * There's a number of places where I updated the links for remote usernames, and they should be tested to make sure they work. They're all `<Permalink>` elements. I have in my notes that `status_content` might need double-checking. * I may also not have gotten the spoiler display right in `status_content`. * It looks like previous versions of Hometown had customizations to the `media_item` display, not all of which was preserved as-is due to surrounding rewrites. I'm not sure what the vision is and could use another set of eyes. * The character count rendering clashed with the newly-introduced handling of a character count variable upstream, and I'm not sure if the version I've got here 100% works or not. * The navigation bar was completely rewritten and no longer has the components Hometown was customizing; did I properly catch any replacements? Did we handle all links to accounts to make sure they go to the remote instance? * Polls: I reverted the single/multiple choice toggle because upstream has an actual proper UI for this that should be integrated in one of the versions we’re merging in. Please double-check that the current version looks fine to you. * The font icons will need double-checking. Hometown made some custom use of the font-awesome icons, while Mastodon 4.3.0 replaced font-awesome with something else. I may not have caught all places that font-awesome icons were being used. * We’ll want to make sure the post display is correct, including Hometown customizations, because the previous static post page has been replaced with the React one. --------- Co-authored-by: Claire <claire.github-309c@sitedethib.com> Co-authored-by: Matt Jankowski <matt@jankowski.online> Co-authored-by: Renaud Chaput <renchap@gmail.com> Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> Co-authored-by: David Roetzel <david@roetzel.de> Co-authored-by: Emelia Smith <ThisIsMissEm@users.noreply.github.com> Co-authored-by: Jeong Arm <kjwonmail@gmail.com> Co-authored-by: Christian Schmidt <github@chsc.dk> Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> Co-authored-by: GitHub Actions <noreply@github.com> Co-authored-by: Eugen Rochko <eugen@zeonfederated.com> Co-authored-by: Darius Kazemi <darius.kazemi@gmail.com> Co-authored-by: diondiondion <mail@diondiondion.com> Co-authored-by: Echo <ChaosExAnima@users.noreply.github.com> Co-authored-by: Shugo Maeda <shugo.maeda@gmail.com> Co-authored-by: emilweth <7402764+emilweth@users.noreply.github.com> Co-authored-by: Shlee <github@shl.ee> Co-authored-by: Joshua Rogers <MegaManSec@users.noreply.github.com> Co-authored-by: Jessica Stokes <hello@jessicastokes.net> Co-authored-by: PGray <77597544+PGrayCS@users.noreply.github.com>
3 weeks ago
return [] unless text&.index(/[##]/)
possible_entries = []
text.scan(Tag::HASHTAG_RE) do |hash_text, _|
match_data = $LAST_MATCH_INFO
start_position = match_data.char_begin(1) - 1
end_position = match_data.char_end(1)
after = ::Regexp.last_match.post_match
if after.start_with?('://')
hash_text.match(/(.+)(https?\Z)/) do |matched|
hash_text = matched[1]
end_position -= matched[2].codepoint_length
end
end
possible_entries << {
hashtag: hash_text,
indices: [start_position, end_position],
}
end
if block_given?
possible_entries.each do |tag|
yield tag[:hashtag], tag[:indices].first, tag[:indices].last
end
end
possible_entries
end
def extract_extra_uris_with_indices(text)
return [] unless text&.index(':')
possible_entries = []
text.scan(Twitter::TwitterText::Regex[:valid_extended_uri]) do
valid_uri_match_data = $LAST_MATCH_INFO
start_position = valid_uri_match_data.char_begin(3)
end_position = valid_uri_match_data.char_end(3)
possible_entries << {
url: valid_uri_match_data[3],
indices: [start_position, end_position],
}
end
if block_given?
possible_entries.each do |url|
yield url[:url], url[:indices].first, url[:indices].last
end
end
possible_entries
end
end