|
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
|
|
module Extractor
|
|
|
|
|
MAX_DOMAIN_LENGTH = 253
|
|
|
|
|
|
|
|
|
|
extend Twitter::TwitterText::Extractor
|
|
|
|
|
|
|
|
|
|
module_function
|
|
|
|
|
|
|
|
|
|
def extract_entities_with_indices(text, options = {}, &block)
|
|
|
|
|
entities = extract_urls_with_indices(text, options) +
|
|
|
|
|
extract_hashtags_with_indices(text, check_url_overlap: false) +
|
|
|
|
|
extract_mentions_or_lists_with_indices(text) +
|
|
|
|
|
extract_extra_uris_with_indices(text)
|
|
|
|
|
|
|
|
|
|
return [] if entities.empty?
|
|
|
|
|
|
|
|
|
|
entities = remove_overlapping_entities(entities)
|
|
|
|
|
entities.each(&block) if block
|
|
|
|
|
entities
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def extract_mentions_or_lists_with_indices(text)
|
|
|
|
|
return [] unless text && Twitter::TwitterText::Regex[:at_signs].match?(text)
|
|
|
|
|
|
|
|
|
|
possible_entries = []
|
|
|
|
|
|
|
|
|
|
text.scan(Account::MENTION_RE) do |screen_name, _|
|
|
|
|
|
match_data = $LAST_MATCH_INFO
|
|
|
|
|
after = ::Regexp.last_match.post_match
|
|
|
|
|
|
|
|
|
|
unless Twitter::TwitterText::Regex[:end_mention_match].match?(after)
|
|
|
|
|
_, domain = screen_name.split('@')
|
|
|
|
|
|
|
|
|
|
next if domain.present? && domain.length > MAX_DOMAIN_LENGTH
|
|
|
|
|
|
|
|
|
|
start_position = match_data.char_begin(1) - 1
|
|
|
|
|
end_position = match_data.char_end(1)
|
|
|
|
|
|
|
|
|
|
possible_entries << {
|
|
|
|
|
screen_name: screen_name,
|
|
|
|
|
indices: [start_position, end_position],
|
|
|
|
|
}
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
if block_given?
|
|
|
|
|
possible_entries.each do |mention|
|
|
|
|
|
yield mention[:screen_name], mention[:indices].first, mention[:indices].last
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
possible_entries
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def extract_hashtags_with_indices(text, _options = {})
|
Bringing Hometown up to date with mastodon/mastodon (#1371)
This is a massive PR, so apologies for the poor performance viewing it!
This merges in the content of Mastodon 4.3.0. *This PR isn't ready to
deploy to production*, and I don't recommend trying it in a production
environment. Since I started by merging 4.3.0, this will likely have
reverted at least some of the content of the post-4.3.0 security patches
in the 4.2.x series; those will be brought back as I merge in 4.4.0 and
4.5.0.
I haven't yet tested this PR, so it may not actually run without
additional tweaks. I'm putting it up now for discussion and so multiple
people can test if we want.
---
Here's a few notes on things I ran into and thoughts I had while working
on it:
* We may want to look at `server_banner.jsx,` where I ended up undoing
the v3-style rewrite due to the new react component being used. I'm not
sure what the current goal for it is.
* I may not have 100% merged in all content from `status_action_bar`
correctly.
* There's a number of places where I updated the links for remote
usernames, and they should be tested to make sure they work. They're all
`<Permalink>` elements. I have in my notes that `status_content` might
need double-checking.
* I may also not have gotten the spoiler display right in
`status_content`.
* It looks like previous versions of Hometown had customizations to the
`media_item` display, not all of which was preserved as-is due to
surrounding rewrites. I'm not sure what the vision is and could use
another set of eyes.
* The character count rendering clashed with the newly-introduced
handling of a character count variable upstream, and I'm not sure if the
version I've got here 100% works or not.
* The navigation bar was completely rewritten and no longer has the
components Hometown was customizing; did I properly catch any
replacements? Did we handle all links to accounts to make sure they go
to the remote instance?
* Polls: I reverted the single/multiple choice toggle because upstream
has an actual proper UI for this that should be integrated in one of the
versions we’re merging in. Please double-check that the current version
looks fine to you.
* The font icons will need double-checking. Hometown made some custom
use of the font-awesome icons, while Mastodon 4.3.0 replaced
font-awesome with something else. I may not have caught all places that
font-awesome icons were being used.
* We’ll want to make sure the post display is correct, including
Hometown customizations, because the previous static post page has been
replaced with the React one.
---------
Co-authored-by: Claire <claire.github-309c@sitedethib.com>
Co-authored-by: Matt Jankowski <matt@jankowski.online>
Co-authored-by: Renaud Chaput <renchap@gmail.com>
Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com>
Co-authored-by: David Roetzel <david@roetzel.de>
Co-authored-by: Emelia Smith <ThisIsMissEm@users.noreply.github.com>
Co-authored-by: Jeong Arm <kjwonmail@gmail.com>
Co-authored-by: Christian Schmidt <github@chsc.dk>
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: GitHub Actions <noreply@github.com>
Co-authored-by: Eugen Rochko <eugen@zeonfederated.com>
Co-authored-by: Darius Kazemi <darius.kazemi@gmail.com>
Co-authored-by: diondiondion <mail@diondiondion.com>
Co-authored-by: Echo <ChaosExAnima@users.noreply.github.com>
Co-authored-by: Shugo Maeda <shugo.maeda@gmail.com>
Co-authored-by: emilweth <7402764+emilweth@users.noreply.github.com>
Co-authored-by: Shlee <github@shl.ee>
Co-authored-by: Joshua Rogers <MegaManSec@users.noreply.github.com>
Co-authored-by: Jessica Stokes <hello@jessicastokes.net>
Co-authored-by: PGray <77597544+PGrayCS@users.noreply.github.com>
3 weeks ago
|
|
|
return [] unless text&.index(/[##]/)
|
|
|
|
|
|
|
|
|
|
possible_entries = []
|
|
|
|
|
|
|
|
|
|
text.scan(Tag::HASHTAG_RE) do |hash_text, _|
|
|
|
|
|
match_data = $LAST_MATCH_INFO
|
|
|
|
|
start_position = match_data.char_begin(1) - 1
|
|
|
|
|
end_position = match_data.char_end(1)
|
|
|
|
|
after = ::Regexp.last_match.post_match
|
|
|
|
|
|
|
|
|
|
if after.start_with?('://')
|
|
|
|
|
hash_text.match(/(.+)(https?\Z)/) do |matched|
|
|
|
|
|
hash_text = matched[1]
|
|
|
|
|
end_position -= matched[2].codepoint_length
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
possible_entries << {
|
|
|
|
|
hashtag: hash_text,
|
|
|
|
|
indices: [start_position, end_position],
|
|
|
|
|
}
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
if block_given?
|
|
|
|
|
possible_entries.each do |tag|
|
|
|
|
|
yield tag[:hashtag], tag[:indices].first, tag[:indices].last
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
possible_entries
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
def extract_extra_uris_with_indices(text)
|
|
|
|
|
return [] unless text&.index(':')
|
|
|
|
|
|
|
|
|
|
possible_entries = []
|
|
|
|
|
|
|
|
|
|
text.scan(Twitter::TwitterText::Regex[:valid_extended_uri]) do
|
|
|
|
|
valid_uri_match_data = $LAST_MATCH_INFO
|
|
|
|
|
|
|
|
|
|
start_position = valid_uri_match_data.char_begin(3)
|
|
|
|
|
end_position = valid_uri_match_data.char_end(3)
|
|
|
|
|
|
|
|
|
|
possible_entries << {
|
|
|
|
|
url: valid_uri_match_data[3],
|
|
|
|
|
indices: [start_position, end_position],
|
|
|
|
|
}
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
if block_given?
|
|
|
|
|
possible_entries.each do |url|
|
|
|
|
|
yield url[:url], url[:indices].first, url[:indices].last
|
|
|
|
|
end
|
|
|
|
|
end
|
|
|
|
|
|
|
|
|
|
possible_entries
|
|
|
|
|
end
|
|
|
|
|
end
|