Hadoop快速入门——第二章、分布式集群(第四节、搭建开发环境)

网友投稿 607 2022-05-29

Hadoop快速入门——第二章、分布式集群

引包:

org.apache.hadoop hadoop-client 2.7.3

可以先安装一下【Big Data Tools】

安装完成后需要重新启动一下。

个人建议,先改一下【镜像】位置为国内的,我就没改,直接update了,玩了好几把【连连看】都没下载完毕。

创建测试类:

import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; public class Action { public static void main(String[] args) { Action action = new Action(); action.init(); System.out.println(action.conf); System.out.println(action.fs); } Configuration conf = null; FileSystem fs = null; public void init() { conf = new Configuration(); try { fs = FileSystem.get(conf); } catch (Exception e) { e.printStackTrace(); } } }

输出:

Configuration: core-default.xml, core-site.xml, mapred-default.xml, mapred-site.xml, yarn-default.xml, yarn-site.xml, hdfs-default.xml, hdfs-site.xml

Hadoop快速入门——第二章、分布式集群(第四节、搭建开发环境)

org.apache.hadoop.fs.LocalFileSystem@43195e57

文件操作:

mkdirs:创建文件夹

import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class Action { public static void main(String[] args) { Action action = new Action(); action.init(); try { boolean isf = action.fs.mkdirs(new Path("/data/infos/")); System.out.println(isf?"创建成功":"创建失败"); } catch (IOException e) { e.printStackTrace(); } } Configuration conf = null; FileSystem fs = null; public void init() { conf = new Configuration(); try { fs = FileSystem.get(conf); } catch (Exception e) { e.printStackTrace(); } } }

会创建在【C盘的根目录】

copyFromLocalFile:复制文件到服务器(本地模拟)

import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class Action { public static void main(String[] args) { Action action = new Action(); action.init(); try { action.fs.copyFromLocalFile(new Path("D:/info.txt"),new Path("/data/infos")); } catch (IOException e) { e.printStackTrace(); } } Configuration conf = null; FileSystem fs = null; public void init() { conf = new Configuration(); try { fs = FileSystem.get(conf); } catch (Exception e) { e.printStackTrace(); } } }

本地效果:

修改文件名称【rename】:

import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class Action { public static void main(String[] args) { Action action = new Action(); action.init(); try { SimpleDateFormat format=new SimpleDateFormat("yyyy_MM_dd"); Date now = new Date(); boolean isf = action.fs.rename(new Path("/data/infos/info.txt"), new Path("/data/infos/" + format.format(now) + ".txt")); System.out.println(isf?"修改成功":"修改失败"); } catch (IOException e) { e.printStackTrace(); } } Configuration conf = null; FileSystem fs = null; public void init() { conf = new Configuration(); try { fs = FileSystem.get(conf); } catch (Exception e) { e.printStackTrace(); } } }

本地效果:

删除文件deleteOnExit:

import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class Action { public static void main(String[] args) { Action action = new Action(); action.init(); try { boolean isf = action.fs.deleteOnExit(new Path("/data/infos/2022_04_19.txt")); System.out.println(isf?"刪除成功":"刪除失败"); } catch (IOException e) { e.printStackTrace(); } } Configuration conf = null; FileSystem fs = null; public void init() { conf = new Configuration(); try { fs = FileSystem.get(conf); } catch (Exception e) { e.printStackTrace(); } } }

查看目录信息:

做一些测试文件:

遍历【/data/下的所有文件】

import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.RemoteIterator; import org.apache.hadoop.fs.permission.FsPermission; public class Action { public static void main(String[] args) { Action action = new Action(); action.init(); try { //查看根目录信息 RemoteIterator listFiles = action.fs.listFiles(new Path("/data/infos/"),true); while (listFiles.hasNext()){ LocatedFileStatus next = listFiles.next(); Path path = next.getPath(); long blockSize = next.getBlockSize(); FsPermission permission = next.getPermission(); long len = next.getLen(); System.out.println("Linux路径:"+path); System.out.println("磁盘大小"+blockSize); System.out.println("权限"+permission); System.out.println("大小"+len+"KB"); System.out.println("-------------------"); } } catch (IOException e) { e.printStackTrace(); } } Configuration conf = null; FileSystem fs = null; public void init() { conf = new Configuration(); try { fs = FileSystem.get(conf); } catch (Exception e) { e.printStackTrace(); } } }

遍历文件以及文件夹listStatus:

编码:

import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.*; import org.apache.hadoop.fs.permission.FsPermission; public class Action { public static void main(String[] args) { Action action = new Action(); action.init(); try { //查看根目录信息 FileStatus[] fileStatuses = action.fs.listStatus(new Path("/data/infos/")); for (FileStatus file:fileStatuses) { if(file.isFile()){ System.out.println("文件"+file.getPath().getName()); }else{ System.out.println("文件夹"+file.getPath().getName()); } } } catch (IOException e) { e.printStackTrace(); } } Configuration conf = null; FileSystem fs = null; public void init() { conf = new Configuration(); try { fs = FileSystem.get(conf); } catch (Exception e) { e.printStackTrace(); } } }

效果:

获取所有节点信息(win系统上看不到)

import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; public class Action { public static void main(String[] args) { Action action = new Action(); action.init(); try { DistributedFileSystem distributedFileSystem = (DistributedFileSystem) action.fs; DatanodeInfo[] datanodeInfos = distributedFileSystem.getDataNodeStats(); for (DatanodeInfo datanodeInfo : datanodeInfos) { System.out.println(datanodeInfo.getHostName()); } } catch (IOException e) { e.printStackTrace(); } } Configuration conf = null; FileSystem fs = null; public void init() { conf = new Configuration(); try { fs = FileSystem.get(conf); } catch (Exception e) { e.printStackTrace(); } } }

HDFS 的设计特点

能够存储超大文件

流式数据访问

商用硬件

不能处理低时间延迟的数据访问

不能存放大量小文件

无法高效实现多用户写入或者任意修改文件

Hadoop 分布式

版权声明:本文内容由网络用户投稿,版权归原作者所有,本站不拥有其著作权,亦不承担相应法律责任。如果您发现本站中有涉嫌抄袭或描述失实的内容,请联系我们jiasou666@gmail.com 处理,核实后本网站将在24小时内删除侵权内容。

上一篇:【云小课】应用平台第34课 如何配置Kafka监控?
下一篇:Unity -- AssetBundle简介
相关文章