|
|
|
|
@ -46,22 +46,13 @@ RSpec.describe LinkDetailsExtractor do
|
|
|
|
|
</html> |
|
|
|
|
HTML |
|
|
|
|
|
|
|
|
|
describe '#title' do |
|
|
|
|
it 'returns the title from title tag' do |
|
|
|
|
expect(subject.title).to eq 'Man bites dog' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#description' do |
|
|
|
|
it 'returns the description from meta tag' do |
|
|
|
|
expect(subject.description).to eq "A dog's tale" |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#language' do |
|
|
|
|
it 'returns the language from lang attribute' do |
|
|
|
|
expect(subject.language).to eq 'en' |
|
|
|
|
end |
|
|
|
|
it 'extracts the expected values from html metadata' do |
|
|
|
|
expect(subject) |
|
|
|
|
.to have_attributes( |
|
|
|
|
title: eq('Man bites dog'), |
|
|
|
|
description: eq("A dog's tale"), |
|
|
|
|
language: eq('en') |
|
|
|
|
) |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
@ -90,40 +81,16 @@ RSpec.describe LinkDetailsExtractor do
|
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
shared_examples 'structured data' do |
|
|
|
|
describe '#title' do |
|
|
|
|
it 'returns the title from structured data' do |
|
|
|
|
expect(subject.title).to eq 'Man bites dog' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#description' do |
|
|
|
|
it 'returns the description from structured data' do |
|
|
|
|
expect(subject.description).to eq "A dog's tale" |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#published_at' do |
|
|
|
|
it 'returns the publicaton time from structured data' do |
|
|
|
|
expect(subject.published_at).to eq '2022-01-31T19:53:00+00:00' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#author_name' do |
|
|
|
|
it 'returns the author name from structured data' do |
|
|
|
|
expect(subject.author_name).to eq 'Charlie Brown' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#provider_name' do |
|
|
|
|
it 'returns the provider name from structured data' do |
|
|
|
|
expect(subject.provider_name).to eq 'Pet News' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#language' do |
|
|
|
|
it 'returns the language from structured data' do |
|
|
|
|
expect(subject.language).to eq 'en' |
|
|
|
|
end |
|
|
|
|
it 'extracts the expected values from structured data' do |
|
|
|
|
expect(subject) |
|
|
|
|
.to have_attributes( |
|
|
|
|
title: eq('Man bites dog'), |
|
|
|
|
description: eq("A dog's tale"), |
|
|
|
|
published_at: eq('2022-01-31T19:53:00+00:00'), |
|
|
|
|
author_name: eq('Charlie Brown'), |
|
|
|
|
provider_name: eq('Pet News'), |
|
|
|
|
language: eq('en') |
|
|
|
|
) |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
@ -245,58 +212,19 @@ RSpec.describe LinkDetailsExtractor do
|
|
|
|
|
</html> |
|
|
|
|
HTML |
|
|
|
|
|
|
|
|
|
describe '#canonical_url' do |
|
|
|
|
it 'returns the URL from Open Graph protocol data' do |
|
|
|
|
expect(subject.canonical_url).to eq 'https://example.com/dog.html' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#title' do |
|
|
|
|
it 'returns the title from Open Graph protocol data' do |
|
|
|
|
expect(subject.title).to eq 'Man bites dog' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#description' do |
|
|
|
|
it 'returns the description from Open Graph protocol data' do |
|
|
|
|
expect(subject.description).to eq "A dog's tale" |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#published_at' do |
|
|
|
|
it 'returns the publicaton time from Open Graph protocol data' do |
|
|
|
|
expect(subject.published_at).to eq '2022-01-31T19:53:00+00:00' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#author_name' do |
|
|
|
|
it 'returns the author name from Open Graph protocol data' do |
|
|
|
|
expect(subject.author_name).to eq 'Charlie Brown' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#language' do |
|
|
|
|
it 'returns the language from Open Graph protocol data' do |
|
|
|
|
expect(subject.language).to eq 'en' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#image' do |
|
|
|
|
it 'returns the image from Open Graph protocol data' do |
|
|
|
|
expect(subject.image).to eq 'https://example.com/snoopy.jpg' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#image:alt' do |
|
|
|
|
it 'returns the image description from Open Graph protocol data' do |
|
|
|
|
expect(subject.image_alt).to eq 'A good boy' |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|
|
|
|
|
describe '#provider_name' do |
|
|
|
|
it 'returns the provider name from Open Graph protocol data' do |
|
|
|
|
expect(subject.provider_name).to eq 'Pet News' |
|
|
|
|
end |
|
|
|
|
it 'extracts the expected values from open graph data' do |
|
|
|
|
expect(subject) |
|
|
|
|
.to have_attributes( |
|
|
|
|
canonical_url: eq('https://example.com/dog.html'), |
|
|
|
|
title: eq('Man bites dog'), |
|
|
|
|
description: eq("A dog's tale"), |
|
|
|
|
published_at: eq('2022-01-31T19:53:00+00:00'), |
|
|
|
|
author_name: eq('Charlie Brown'), |
|
|
|
|
language: eq('en'), |
|
|
|
|
image: eq('https://example.com/snoopy.jpg'), |
|
|
|
|
image_alt: eq('A good boy'), |
|
|
|
|
provider_name: eq('Pet News') |
|
|
|
|
) |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
end |
|
|
|
|
|