将代码DownloadSomeData.java放在教程的Java Project中,代码如下:
package com.alibaba.alink; import org.apache.commons.io.FileUtils; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import java.io.File; import java.net.URL; /** * 本代码只能下载部分章节的数据。 * 全部数据链接地址:https://www.yuque.com/pinshu/alink_tutorial/book_java_reference */ public class DownloadSomeData { public static void main(String[] args) throws Exception { downloadUrl( "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", Chap03.LOCAL_DIR ); downloadUrl( "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", Chap03.LOCAL_DIR ); downloadUrl( "http://files.grouplens.org/datasets/movielens/ml-100k/u.data", Chap03.LOCAL_DIR ); downloadUrl( "http://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt", Chap08.DATA_DIR ); downloadUrl( "http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", Chap09.DATA_DIR ); downloadUrl( "http://archive.ics.uci.edu/ml/machine-learning-databases/statlog/german/german.data", Chap10.DATA_DIR ); downloadUrl( "http://alink-release.oss-cn-beijing.aliyuncs.com/data-files/action_log.csv", Chap11.DATA_DIR ); downloadUrl( "http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", Chap12.DATA_DIR ); downloadUrl( "http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz", Chap13.DATA_DIR ); downloadUrl( "http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz", Chap13.DATA_DIR ); downloadUrl( "http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz", Chap13.DATA_DIR ); downloadUrl( "http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz", Chap13.DATA_DIR ); downloadUrl( "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv", Chap16.DATA_DIR ); for (String fileName : new String[]{Chap24.ITEM_FILE, Chap24.USER_FILE, Chap24.RATING_FILE, Chap24.RATING_TRAIN_FILE, Chap24.RATING_TEST_FILE }) { downloadUrl( "http://files.grouplens.org/datasets/movielens/ml-100k/" + fileName, Chap24.DATA_DIR ); } downloadUrlHttps( "https://raw.githubusercontent.com/tennessine/corpus/master/%E4%B8%89%E5%9B%BD%E6%BC%94%E4%B9%89.txt", Chap22.DATA_DIR ); downloadUrlHttps( "https://github.com/BenDerPan/toutiao-text-classfication-dataset/raw/master/toutiao_cat_data.txt.zip", Chap21.DATA_DIR ); } static synchronized void downloadUrl(String url, String dirPath) { try { URL httpUrl = new URL(url); File dir = new File(dirPath); if (!dir.exists()) { dir.mkdirs(); } String fileName = url.substring(url.lastIndexOf("/") + 1); FileUtils.copyURLToFile(httpUrl, new File(dir, fileName)); // FileUtils.copyURLToFile(httpUrl, new File(dir, fileName), 30000, 10000); System.out.println("Success @ " + url); } catch (Exception e) { System.err.println("Failed @ " + url); System.err.println(e.toString()); } } static synchronized void downloadUrlHttps(String url, String dirPath) { try { URL httpsUrl = new URL(url); File dir = new File(dirPath); if (!dir.exists()) { dir.mkdirs(); } String str = java.net.URLDecoder.decode(url, "UTF-8"); String fileName = str.substring(str.lastIndexOf("/") + 1); SSLContext context = SSLContext.getInstance("TLS"); context.init(null, null, null); HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory()); HttpsURLConnection connection = (HttpsURLConnection) httpsUrl.openConnection(); FileUtils.copyURLToFile(connection.getURL(), new File(dir, fileName), 60000, 50000); System.out.println("Success @ " + url); } catch (Exception e) { System.err.println("Failed @ " + url); System.err.println(e.toString()); } } }
在Java Project中的位置及运行结果,如下面截图所示: