Fix crash in url preview when html tag has no text

Signed-off-by: Marcin Bachry <hegel666@gmail.com>
This commit is contained in:
Marcin Bachry 2016-12-14 22:38:18 +01:00
parent c3208e45c9
commit 24c16fc349
2 changed files with 54 additions and 1 deletions

View File

@ -381,7 +381,10 @@ def _calc_og(tree, media_uri):
if 'og:title' not in og: if 'og:title' not in og:
# do some basic spidering of the HTML # do some basic spidering of the HTML
title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]") title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]")
og['og:title'] = title[0].text.strip() if title else None if title and title[0].text is not None:
og['og:title'] = title[0].text.strip()
else:
og['og:title'] = None
if 'og:image' not in og: if 'og:image' not in og:
# TODO: extract a favicon failing all else # TODO: extract a favicon failing all else

View File

@ -215,3 +215,53 @@ class PreviewUrlTestCase(unittest.TestCase):
u"og:title": u"Foo", u"og:title": u"Foo",
u"og:description": u"Some text." u"og:description": u"Some text."
}) })
def test_missing_title(self):
html = u"""
<html>
<body>
Some text.
</body>
</html>
"""
og = decode_and_calc_og(html, "http://example.com/test.html")
self.assertEquals(og, {
u"og:title": None,
u"og:description": u"Some text."
})
def test_h1_as_title(self):
html = u"""
<html>
<meta property="og:description" content="Some text."/>
<body>
<h1>Title</h1>
</body>
</html>
"""
og = decode_and_calc_og(html, "http://example.com/test.html")
self.assertEquals(og, {
u"og:title": u"Title",
u"og:description": u"Some text."
})
def test_missing_title_and_broken_h1(self):
html = u"""
<html>
<body>
<h1><a href="foo"/></h1>
Some text.
</body>
</html>
"""
og = decode_and_calc_og(html, "http://example.com/test.html")
self.assertEquals(og, {
u"og:title": None,
u"og:description": u"Some text."
})