Jump to content

Search the Community

Showing results for tags 'xpath python python3 lxml'.

  • Search By Tags

    Type tags separated by commas.
  • Search By Author

Content Type


Forums

  • Development
    • HTML Coding
    • JavaScript
    • PHP
    • CMS
    • Database
    • Web Server
    • Web-site Development
    • Internet Marketing, SEO
  • Library
    • Tricks and solutions
    • Books
  • Commercial services
    • Freelance
    • Job
    • Goods and Services
  • Our Forum
    • Flame
    • Contests
    • Feedback and Ideas

Find results in...

Find results that contain...


Date Created

  • Start

    End


Last Updated

  • Start

    End


Filter by number of...

Joined

  • Start

    End


Group


Web site


Telegram


Signal


Viber


Skype


From


Interests

Found 1 result

  1. помогите пожалуйста исправить xpath-выражение поиска функция parse_catalog_goods() следующего скрипта ищет элементы <a> первого уровня вложенности(то есть собирает информацию о всех товарах каталога). если каталог в будущем будет содержать <a> более высоких уровней вложенности, то parse_catalog_goods() не сможет собирать всю информацию о товарах каталога import pprintimport requestsimport lxml.etreeimport lxml.htmldef get_catalog(url): try: req = requests.get(url) except Exception: print('Error open. ') else: html = req.text doc = lxml.html.document_fromstring(html) listCatalog = doc.xpath('//section[@class="catalog"]/ul') if listCatalog: return listCatalog[0] else: print('Error find catalog')def parse_catalog_categories(catalogMenuList): listA = catalogMenuList.xpath('li/a') dictCategories = {} for elem in listA: href = elem.xpath('@href')[0] text = elem.xpath('text()')[0].strip() dictCategories[text] = href if not dictCategories: print('Error. dictonary is empty') else: return dictCategoriesdef parse_catalog_goods(catalogMenuList): listA = catalogMenuList.xpath('li/ul/li/a') #??????????????????????????????? print(listA[0]) dictGoods = {} for elem in listA: href = elem.xpath('@href')[0] text = elem.xpath('text()')[0].strip() dictGoods[text] = href if not dictGoods: print('Error. dictonary is empty') else: return dictGoods def put_xml(dictCategories, parentTag, childTag): parent = lxml.etree.Element(parentTag) for name, link in dictCategories.items(): child = lxml.etree.Element(childTag) child.text = name parent.append(child) xml = lxml.etree.tounicode(parent, pretty_print=True) if not xml: print('Error xml tree build') else: return xml def record_xml(xml, fileName): try: with open(fileName, "w") as file: file.write(xml) except Exception: print('Error record', Exception)if __name__ == "__main__": url = 'http://first-store.ru/' listCatalog = get_catalog(url) #dictCategories = parse_catalog_categories(listCatalog) #pprint.pprint(dictCategories) #xmlCategories = put_xml(dictCategories, 'categories', 'category') #print(xmlCategories) #record_xml(xmlCategories, 'categories.xml') dictGoods = parse_catalog_goods(listCatalog) #pprint.pprint(dictGoods) xmlGoods = put_xml(dictGoods, 'goods', 'item') print(xmlGoods) record_xml(xmlGoods, 'goods.xml')извиняюсь за много букв psнужно выбрать именно товары, а не названия категорий товаров(которые тоже заключены в <a>)
×
×
  • Create New...

Important Information

We have placed cookies on your device to help make this website better. You can adjust your cookie settings, otherwise we'll assume you're okay to continue. See more about our Guidelines and Privacy Policy