Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions webmagic-core/src/main/java/us/codecraft/webmagic/Request.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
package us.codecraft.webmagic;

import java.util.ArrayList;
import java.util.List;
import us.codecraft.webmagic.downloader.Downloader;
import us.codecraft.webmagic.model.HttpRequestBody;
import us.codecraft.webmagic.pipeline.Pipeline;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.scheduler.Scheduler;
import us.codecraft.webmagic.utils.Experimental;

import java.io.Serializable;
Expand Down Expand Up @@ -53,6 +59,14 @@ public class Request implements Serializable {

private String charset;

private Downloader downloader;

private PageProcessor pageProcessor;

private Scheduler scheduler;

private List<Pipeline> pipelines = new ArrayList<Pipeline>();

public Request() {
}

Expand Down Expand Up @@ -188,6 +202,38 @@ public Request setCharset(String charset) {
return this;
}

public Downloader getDownloader() {
return downloader;
}

public void setDownloader(Downloader downloader) {
this.downloader = downloader;
}

public PageProcessor getPageProcessor() {
return pageProcessor;
}

public void setPageProcessor(PageProcessor pageProcessor) {
this.pageProcessor = pageProcessor;
}

public Scheduler getScheduler() {
return scheduler;
}

public void setScheduler(Scheduler scheduler) {
this.scheduler = scheduler;
}

public List<Pipeline> getPipelines() {
return pipelines;
}

public void addPipelines(Pipeline pipeline) {
this.pipelines.add(pipeline);
}

@Override
public String toString() {
return "Request{" +
Expand Down
24 changes: 20 additions & 4 deletions webmagic-core/src/main/java/us/codecraft/webmagic/Spider.java
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,11 @@ public void test(String... urls) {
}

private void processRequest(Request request) {
Page page = downloader.download(request, this);
Downloader dl = request.getDownloader();
if(null == dl){
dl = downloader;
}
Page page = dl.download(request, this);
if (page.isDownloadSuccess()){
onDownloadSuccess(request, page);
} else {
Expand All @@ -411,10 +415,18 @@ private void processRequest(Request request) {

private void onDownloadSuccess(Request request, Page page) {
if (site.getAcceptStatCode().contains(page.getStatusCode())){
pageProcessor.process(page);
PageProcessor pp = request.getPageProcessor();
if(null == pp){
pp = pageProcessor;
}
pp.process(page);
extractAndAddRequests(page, spawnUrl);
if (!page.getResultItems().isSkip()) {
for (Pipeline pipeline : pipelines) {
List<Pipeline> ps = request.getPipelines();
if(ps.isEmpty()){
ps.addAll(pipelines);
}
for (Pipeline pipeline : ps) {
pipeline.process(page.getResultItems(), this);
}
}
Expand Down Expand Up @@ -468,7 +480,11 @@ private void addRequest(Request request) {
if (site.getDomain() == null && request != null && request.getUrl() != null) {
site.setDomain(UrlUtils.getDomain(request.getUrl()));
}
scheduler.push(request, this);
Scheduler sc = request.getScheduler();
if(null == sc){
sc = scheduler;
}
sc.push(request, this);
}

protected void checkIfRunning() {
Expand Down