[ESP-ENG] Titulos y enlaces en endjin || Titles and links in endjin

0 comments

pynomiems3 K3 years agoPeakD


Imagen diseñada con canva || Image designed with canva

import httpx
from selectolax.parser import HTMLParser

headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36 Edg/87.0.664.75'}

url_list=['https://endjin.com/what-we-think/editions/analytics.html',
'https://endjin.com/what-we-think/editions/automation.html',
'https://endjin.com/what-we-think/editions/big-compute.html',
'https://endjin.com/what-we-think/editions/open-source.html',
'https://endjin.com/what-we-think/editions/internet-of-things.html']

for url_list in url_list:

client=httpx.Client(headers=headers,follow_redirects=True)
endjin=client.get(url_list).text

with open('endjin.html',mode='w',encoding="utf-8") as archive:

    archive.write(endjin)
    archive.close()

    f=open('endjin.html',encoding="utf-8")

    local_html=HTMLParser(f.read())

for parsing in local_html.css('a.card-link-wrap'):
    
    headlines=parsing.text(strip=True)
    links=parsing.attributes['href']
    
    print(f'headlines:{headlines} links:{links}')

Comments

Sort byBest