Sei sulla pagina 1di 3

pammidash@gmail.

com
Pd754246
http://192.168.131.128
https://www.youtube.com/watch?v=aQNqqLhyz7A&index=3&list=PLf0swTFhTI8q5X0ZBeUaK1
yl_TP-tJInC&spfreload=5

http://www.cloudera.com/downloads/quickstart_vms/5-5.html
===========creating mysql database on hortoonworks sandbox===================
mysql> mysql -u root
mysql>
mysql>
mysql>
mysql>

create database retail_db;


create user 'retail_dba' identified by 'hadoop';
grant all on retail_db.* to retail_dba;
flush privileges;

mysql -u retail_dba -p
mysql> show databases;
mysql> use retail_db;
hadoop fs -put /tmp/*.gz /user/flume/twitter_upload/
hadoop fs -put /tmp/dictionary.tsv /user/flume/twitter_upload/data/dictionary
hadoop fs -put /tmp/time_zone_map.tsv /user/flume/twitter_upload/data/time_zone_
map
hadoop fs -copyToLocal /user/flume/json-serde-1.3-jar-with-dependencies.jar .

CREATE database amiya_TWITTER_SENTIMENT;


CREATE EXTERNAL TABLE tweets_raw (
id BIGINT,
created_at STRING,
source STRING,
favorited BOOLEAN,
retweet_count INT,
retweeted_status STRUCT<
text:STRING,
users:STRUCT<screen_name:STRING,name:STRING>>,
entities STRUCT<
urls:ARRAY<STRUCT<expanded_url:STRING>>,
user_mentions:ARRAY<STRUCT<screen_name:STRING,name:STRING>>,
hashtags:ARRAY<STRUCT<text:STRING>>>,
text STRING,

user STRUCT<
screen_name:STRING,
name:STRING,
friends_count:INT,
followers_count:INT,
statuses_count:INT,
verified:BOOLEAN,
utc_offset:STRING, -- was INT but nulls are strings
time_zone:STRING>,
in_reply_to_screen_name STRING,
year int,
month int,
day int,
hour int
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES ("ignore.malformed.json" = "true")
LOCATION '/user/flume/twitter_upload/data/tweets_raw'
;
CREATE EXTERNAL TABLE dictionary (
type string,
length int,
word string,
pos string,
stemmed string,
polarity string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
LOCATION '/user/flume/twitter_upload/data/dictionary';

CREATE EXTERNAL TABLE time_zone_map (


time_zone string,
country string,
notes string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE
LOCATION '/user/flume/twitter_upload/data/time_zone_map';
sqoop list-databases \
--connect "jdbc:mysql://sandbox.hortonworks.com:3306" \
--username retail_dba \
--password hadoop
sqoop list-tables \
--connect "jdbc:mysql://sandbox.hortonworks.com:3306/retail_db" \
--username retail_dba \
--password hadoop
sqoop list-tables --driver oracle.jdbc.driver.OracleDriver \
--connect jdbc:oracle:thin:@192.168.0.185:1521:PROD \
--username apps \

--password mastermind1
sqoop eval \
--connect "jdbc:mysql://sandbox.hortonworks.com:3306/retail_db" \
--username retail_dba \
--password hadoop \
--query "select count(1) from order_items"
sqoop import-all-tables \
-m 12 \
--connect "jdbc:mysql://sandbox.hortonworks.com:3306/retail_db" \
--username=retail_dba \
--password=hadoop \
--as-avrodatafile \
--warehouse-dir=/apps/hive/warehouse/retail_stage.db

Potrebbero piacerti anche