ci-dessous est le HTML.
//div[@id='pay-bills']//div//h2[@class='head ng-binding'][2]
3 Réponses :
J'espère que cela peut aider.
>>> html = """ <div class="list-items tab-pane fade active show" ng-class="type=='pay'?'show active':''" tabindex="1" id="pay-bills" ... > ... ... <div ng-repeat="item in BillList.Result.categorizedBillNickBene" class="ng-scope"> ... <h2 class="head ng-binding" ng-show="filtered.length!=0">Electricity Bill Payment</h2> ... </div> ... ... <div ng-repeat="item in BillList.Result.categorizedBillNickBene" class="ng-scope"> ... <h2 class="head ng-binding" ng-show="filtered.length!=0">Gas Bill Payment</h2> ... </div> ... ... </div>""" >>> html ' <div class="list-items tab-pane fade active show" ng-class="type==\'pay\'?\'show active\':\'\'" tabindex="1" id="pay-bills">\n\n <div ng-repeat="item in BillList.Result.categorizedBillNickBene" class="ng-scope">\n <h2 class="head ng-binding" ng-show="filtered.length!=0">Electricity Bill Payment</h2>\n </div>\n\n <div ng-repeat="item in BillList.Result.categorizedBillNickBene" class="ng-scope">\n <h2 class="head ng-binding" ng-show="filtered.length!=0">Gas Bill Payment</h2>\n </div>\n\n</div>' >>> from pprint import pprint >>> pprint(html) (' <div class="list-items tab-pane fade active show" ' 'ng-class="type==\'pay\'?\'show active\':\'\'" tabindex="1" id="pay-bills">\n' '\n' ' <div ng-repeat="item in BillList.Result.categorizedBillNickBene" ' 'class="ng-scope">\n' ' <h2 class="head ng-binding" ng-show="filtered.length!=0">Electricity ' 'Bill Payment</h2>\n' ' </div>\n' '\n' ' <div ng-repeat="item in BillList.Result.categorizedBillNickBene" ' 'class="ng-scope">\n' ' <h2 class="head ng-binding" ng-show="filtered.length!=0">Gas Bill ' 'Payment</h2>\n' ' </div>\n' '\n' '</div>') >>> from scrapy.http import HtmlResponse >>> response = HtmlResponse(url='my html string', body=html, encoding='utf-8') >>> response.xpath('//div') [<Selector xpath='//div' data='<div class="list-items tab-pane fade ...'>, <Selector xpath='//div' data='<div ng-repeat="item in BillList.Resu...'>, <Selector xpath='//div' data='<div ng-repeat="item in BillList.Resu...'>] >>> response.xpath('//div').getall() ['<div class="list-items tab-pane fade active show" ng-class="type==\'pay\'?\'show active\':\'\'" tabindex="1" id="pay-bills">\n\n <div ng-repeat="item in BillList.Result.categorizedBillNickBene" class="ng-scope">\n <h2 class="head ng-binding" ng-show="filtered.length!=0">Electricity Bill Payment</h2>\n </div>\n\n <div ng-repeat="item in BillList.Result.categorizedBillNickBene" class="ng-scope">\n <h2 class="head ng-binding" ng-show="filtered.length!=0">Gas Bill Payment</h2>\n </div>\n\n</div>', '<div ng-repeat="item in BillList.Result.categorizedBillNickBene" class="ng-scope">\n <h2 class="head ng-binding" ng-show="filtered.length!=0">Electricity Bill Payment</h2>\n </div>', '<div ng-repeat="item in BillList.Result.categorizedBillNickBene" class="ng-scope">\n <h2 class="head ng-binding" ng-show="filtered.length!=0">Gas Bill Payment</h2>\n </div>'] >>> response.xpath('//div//h2').getall() ['<h2 class="head ng-binding" ng-show="filtered.length!=0">Electricity Bill Payment</h2>', '<h2 class="head ng-binding" ng-show="filtered.length!=0">Gas Bill Payment</h2>'] >>> response.xpath('//div//h2').getall()[1] '<h2 class="head ng-binding" ng-show="filtered.length!=0">Gas Bill Payment</h2>' >>> response.xpath('//div//h2/text()').getall()[1] 'Gas Bill Payment'
Vous voulez la deuxième div, ce que vous avez essayé vous donnerait la deuxième H2 à l'intérieur de la DIV s'il y en avait un.
Essayez
Pourquoi cela ne fonctionne pas:
(//div[@id='pay-bills']/div/h2)[2]/text()