mirror of
				https://gitlab.com/ceda_ei/firefox-web-apps
				synced 2025-11-04 11:10:06 +01:00 
			
		
		
		
	Handle edge cases with no title for metadata extraction
This commit is contained in:
		@@ -45,6 +45,8 @@ def extract_metadata(url):
 | 
			
		||||
    metadata = {}
 | 
			
		||||
 | 
			
		||||
    # Find the title
 | 
			
		||||
    titles = []
 | 
			
		||||
    if soup.title:
 | 
			
		||||
        titles = [soup.title.string]
 | 
			
		||||
    for tag in soup.find_all("meta"):
 | 
			
		||||
        title_props = ["title", "og:title", "twitter:title"]
 | 
			
		||||
@@ -53,11 +55,16 @@ def extract_metadata(url):
 | 
			
		||||
            titles.append(tag["content"])
 | 
			
		||||
    # Set title to the most common if it occurs more than once, else prefer
 | 
			
		||||
    # title tag
 | 
			
		||||
    most_common = Counter(titles).most_common(1)[0]
 | 
			
		||||
    if most_common[1] > 1:
 | 
			
		||||
        metadata["title"] = most_common[0].strip()
 | 
			
		||||
    most_common = Counter(titles).most_common(1)
 | 
			
		||||
    if not most_common:
 | 
			
		||||
        metadata["title"] = None
 | 
			
		||||
    elif most_common[0][1] > 1:
 | 
			
		||||
        metadata["title"] = most_common[0][0].strip()
 | 
			
		||||
    else:
 | 
			
		||||
        if soup.title:
 | 
			
		||||
            metadata["title"] = soup.title.string.strip()
 | 
			
		||||
        else:
 | 
			
		||||
            metadata["title"] = most_common[0][0].strip()
 | 
			
		||||
 | 
			
		||||
    # Find the image.
 | 
			
		||||
    # Try link first, followed by /favicon.{png,ico}, followed by og:, twitter:
 | 
			
		||||
@@ -121,6 +128,7 @@ def main():
 | 
			
		||||
        eprint(f"Maybe you meant https://{args.url} ?")
 | 
			
		||||
        sys.exit(1)
 | 
			
		||||
 | 
			
		||||
    print("Fetching details ...")
 | 
			
		||||
    metadata = extract_metadata(args.url)
 | 
			
		||||
    if not args.name:
 | 
			
		||||
        args.name = metadata["title"]
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user